{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import copy\n",
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "from tools.data_handling import calculate_p_value, latexify_p_value\n",
    "from tools.data_handling import make_OR_plot, make_boxplot, make_lm_plot, rank_INT\n",
    "from tools.file_utilities import make_folder_if_not_exists\n",
    "import statsmodels.api as sm\n",
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.stats.multitest as smm\n",
    "from firthlogist import FirthLogisticRegression\n",
    "import patsy\n",
    "import scipy.stats as stats\n",
    "import qgrid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Signatures to analyse: COSMIC or denovo\n",
    "run_name = 'RCC_961_minadj_stage_bonf_CI'\n",
    "analysis = 'COSMIC'\n",
    "# Mutation type to analyse\n",
    "mutation_types = ['SBS', 'DBS', 'ID']#, 'CNV', 'SV']\n",
    "SBS_context = 1536\n",
    "if analysis=='COSMIC':\n",
    "    if 'SV' in mutation_types:\n",
    "        mutation_types.remove('SV')\n",
    "    SBS_context = 96\n",
    "    \n",
    "only_Balkan_countries = False\n",
    "only_early_stage = False\n",
    "drop_Balkan_countries = False\n",
    "drop_countries_with_less_than_20_cases = False\n",
    "drop_Japan = False\n",
    "drop_Serbian_hypermutator = True\n",
    "\n",
    "# parameters to consider\n",
    "parameters_for_regressions = ['age_group', 'sex', 'stage', #'country',\n",
    "                              'tobacco_ever', #'alcohol_ever', \n",
    "                              'bmi_q', 'hypert', 'diabetes',\n",
    "                              'PFOA_q', #, 'PFOS_q',\n",
    "                              'fam_rcc',\n",
    "                               ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import the metadata\n",
    "data_merged = pd.read_csv('data_merged.csv',index_col=0)\n",
    "\n",
    "data_merged = data_merged.replace('Czech Republic', ' Czechia')\n",
    "data_merged = data_merged.replace('United Kingdom', 'UK')\n",
    "data_merged = data_merged.replace('Ceske Budejovice', 'Ceske B.')\n",
    "\n",
    "if drop_countries_with_less_than_20_cases:\n",
    "    data_merged = data_merged.query(\"country != 'Thailand' and country != 'Poland' and country != 'Lithuania'\")\n",
    "\n",
    "if only_Balkan_countries:\n",
    "    data_merged = data_merged.query(\"country == 'Romania' or country == 'Serbia'\")\n",
    "\n",
    "if drop_Balkan_countries:\n",
    "    data_merged = data_merged.query(\"country != 'Romania' and country != 'Serbia'\")\n",
    "\n",
    "if drop_Japan:\n",
    "    data_merged = data_merged.query(\"country != 'Japan'\")\n",
    "    \n",
    "if only_early_stage:\n",
    "    data_merged = data_merged.query(\"stage == 'I' or stage == 'II'\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import signature attributions\n",
    "merged_sigs_with_metadata = data_merged.copy()\n",
    "for mutation_type in mutation_types:\n",
    "    context = SBS_context if mutation_type=='SBS' else 78 if mutation_type=='DBS' else 83 if mutation_type=='ID' else 32 if mutation_type=='SV' else 48 if mutation_type=='CNV' else 'NA'\n",
    "    mutation_type_context = mutation_type + str(context)\n",
    "    if analysis=='COSMIC':\n",
    "#     v14 COSMIC nominal\n",
    "        sigs_CIs = pd.read_csv('./MSA_attribution/962_Consensus_v3/cosmic_noCI_penalties/SigProfilerExtractor/%s/output_tables/CIs_RCC_Manuscript_COSMIC_%s_bootstrap_output_weights.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        sigs_abs = pd.read_csv('./MSA_attribution/962_Consensus_v3/cosmic_noCI_penalties/SigProfilerExtractor/%s/output_tables/output_RCC_Manuscript_COSMIC_%s_mutations_table.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        stat_inf = pd.read_csv('./MSA_attribution/962_Consensus_v3/cosmic_noCI_penalties/SigProfilerExtractor/%s/output_tables/output_RCC_Manuscript_COSMIC_%s_stat_info.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        sigs_abs = sigs_abs.add_suffix('_abs')\n",
    "    elif analysis=='denovo':\n",
    "        sigs_CIs = pd.read_csv('./MSA_attribution/962_Consensus_v3/denovo_noCI_penalties/SigProfilerExtractor/%s/output_tables/CIs_RCC_Manuscript_denovo_%s_bootstrap_output_weights.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        sigs_abs = pd.read_csv('./MSA_attribution/962_Consensus_v3/denovo_noCI_penalties/SigProfilerExtractor/%s/output_tables/output_RCC_Manuscript_denovo_%s_mutations_table.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        stat_inf = pd.read_csv('./MSA_attribution/962_Consensus_v3/denovo_noCI_penalties/SigProfilerExtractor/%s/output_tables/output_RCC_Manuscript_denovo_%s_stat_info.csv' % (mutation_type_context, mutation_type_context), index_col=0)\n",
    "        sigs_abs = sigs_abs.add_suffix('_abs')\n",
    "    stat_inf = stat_inf.rename(columns={'Mutational burden': mutation_type + '_burden'})\n",
    "    # merge with metadata\n",
    "    merged_sigs_with_metadata = pd.concat([sigs_CIs, merged_sigs_with_metadata], axis=1)#.fillna(0)\n",
    "    merged_sigs_with_metadata = pd.concat([sigs_abs, merged_sigs_with_metadata], axis=1)#.fillna(0)\n",
    "    merged_sigs_with_metadata = pd.concat([stat_inf[mutation_type + '_burden'], merged_sigs_with_metadata], axis=1)#.fillna(0)\n",
    "merged_sigs_with_metadata.dropna(subset=['country'], inplace=True)\n",
    "# qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b3b3acd05f34243a31837a71b8de393",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# DBS to nan for CAGEKID\n",
    "if 'DBS' in mutation_types:\n",
    "    DBS_cols = [col for col in merged_sigs_with_metadata.columns if 'DBS' in col]\n",
    "    merged_sigs_with_metadata.loc[merged_sigs_with_metadata['Sequencing'] == 'External (CAGEKID)', DBS_cols] = np.nan\n",
    "# rename SV/CNV signatures if present\n",
    "merged_sigs_with_metadata.columns = merged_sigs_with_metadata.columns.str.replace(\"SBSSV\", \"SV_\")\n",
    "merged_sigs_with_metadata.columns = merged_sigs_with_metadata.columns.str.replace(\"SBSCNV\", \"CNV_\")\n",
    "qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for mutation_type in mutation_types:\n",
    "    columns_to_scan = merged_sigs_with_metadata.columns \n",
    "    if mutation_type == 'CNV' and analysis=='COSMIC':\n",
    "        mutation_type = 'CN'\n",
    "    if 'SBS' in mutation_type:\n",
    "        # SP output idiosyncrasy\n",
    "        columns_to_scan = [ col for col in columns_to_scan if \"CNV\" not in col ]\n",
    "        columns_to_scan = [ col for col in columns_to_scan if \"SV\" not in col ]\n",
    "    sigs = [col for col in columns_to_scan if mutation_type in col\n",
    "            and not '_abs' in col and not 'CI' in col and not 'burden' in col]\n",
    "    for signature in sigs:\n",
    "        for sample in merged_sigs_with_metadata.index:\n",
    "            attribution = merged_sigs_with_metadata.loc[sample, signature + '_abs']\n",
    "            CI_string = merged_sigs_with_metadata.loc[sample, signature]\n",
    "            if not pd.isnull(CI_string):\n",
    "                CI_string = CI_string.translate({ord(i):None for i in '[,]'})\n",
    "                central, lower_CI, upper_CI = [float(x) for x in CI_string.split()]\n",
    "                if lower_CI != 0:\n",
    "                    merged_sigs_with_metadata.loc[sample, signature + '_CI'] = attribution\n",
    "                else:\n",
    "                    merged_sigs_with_metadata.loc[sample, signature + '_CI'] = 0\n",
    "            else:\n",
    "                central, lower_CI, upper_CI = [np.nan, np.nan, np.nan]\n",
    "                merged_sigs_with_metadata.loc[sample, signature + '_CI'] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          N cases  SBS22 (%) SBS1536I (%) DBS78D (%)  ID83C (%)  \\\n",
      "Romania        64  45 (70.3)    48 (75.0)  42 (65.6)  13 (20.3)   \n",
      "Serbia         69  16 (23.2)    33 (47.8)  11 (15.9)    3 (4.3)   \n",
      "Thailand        5   3 (60.0)     3 (60.0)    0 (0.0)    0 (0.0)   \n",
      "Brazil         96    3 (3.1)      3 (3.1)    1 (1.0)    0 (0.0)   \n",
      "Canada         73    2 (2.7)      0 (0.0)    2 (2.7)    1 (1.4)   \n",
      "Czechia       259    1 (0.4)      5 (1.9)  32 (12.4)    0 (0.0)   \n",
      "UK            115    1 (0.9)      0 (0.0)  31 (27.0)    0 (0.0)   \n",
      "Russia        216    0 (0.0)      1 (0.5)  26 (12.0)    0 (0.0)   \n",
      "Poland         13    0 (0.0)      0 (0.0)    1 (7.7)    0 (0.0)   \n",
      "Lithuania      16    0 (0.0)      1 (6.2)    1 (6.2)    0 (0.0)   \n",
      "Japan          36    0 (0.0)      1 (2.8)    1 (2.8)    0 (0.0)   \n",
      "\n",
      "          SBS22 or SBS1536I (%)    Any (%)  \n",
      "Romania               53 (82.8)  54 (84.4)  \n",
      "Serbia                35 (50.7)  36 (52.2)  \n",
      "Thailand               4 (80.0)   4 (80.0)  \n",
      "Brazil                  3 (3.1)    3 (3.1)  \n",
      "Canada                  2 (2.7)    3 (4.1)  \n",
      "Czechia                 6 (2.3)  37 (14.3)  \n",
      "UK                      1 (0.9)  31 (27.0)  \n",
      "Russia                  1 (0.5)  27 (12.5)  \n",
      "Poland                  0 (0.0)    1 (7.7)  \n",
      "Lithuania               1 (6.2)   2 (12.5)  \n",
      "Japan                   1 (2.8)    1 (2.8)  \n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOAAAADCCAYAAABQSc7cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgKklEQVR4nO3dfVxUdd7/8deZAYaRGxEB7wXBRDc0C++vFC1r1/YK3S40byp9rHazKpZaikkIKA8L2dWU69dmN7oPu0HNLndt0dJyyVbZJDRvIBVEQVBAAWVQYJj5/v7gknKvOFAwZ2bi+3w8zuPhnBnOvGecz3zPnO85368ihBBIkmQXOnsHkKSOTBagJNmRLEBJsiNZgJJkR7IAJcmOZAFKkh252DuAJDmjI0eO8Mknn5CUlATAZ599xqFDh7BarSQkJLB9+3by8/NxdXVlxYoVzW5HtoCS9BNdvHiRnJwc6urqmtbt3buX1atXM3z4cI4cOUJWVhZxcXF4eXlx/vz5ZrdlsxbQ2HeGrTbdJv/MfsLeEZr19H5ve0doVvnlBntHUFW4ZEKrHtfS5zL5pTGkpqbesW7hwoVER0c33Q4MDGTu3Lm8+OKLTetcXBpLyd/fn9LS0qbbAQEBlJeXExwc/KPPJ3dBJekHoqOj7yi21tLr9QCUl5fj5+eHTte4c1lWVkZ4eHizfycLUOpQdEr7fuR3795NWFgYv/nNb4iNjUUIQWJiIqWlpSQkJGAwGAgJCWn272UBSh2KTtd+H/mUlJSmf/fv35/x48c33Z46dWqrtiELUOpQFEWxd4Q7yAKUOhSlnXdB28qx0kiSjSmKY/W8yQKUOpT2PgjTVo6VRpJsTLaAkmRHOkVv7wh3+EkFaDKZ8PT0tFUWSbK59uyGaA8tpomJiSE/P7/pFJtdu3ZpkUuSbMLpdkGNRiMLFizAx8eHTz/9VItMkmQzTtcN4eXlhaenJ++88w5Wq1WLTJJkM07VAt66dYuHH36YsLAw7r77bi5evKhVLkmyCUfrhlD9Oli5cmXTdU0uLi68++67WuWSJJtQFEV10Zrq14Gnpydz5syhsLCQ1157DTmGr+TsnKoFDA4O5uDBg/Tt25cpU6Zw4sQJrXJJkk3odC6qi9ZUn3HSpEkcP36c+vp6wsLCSExM1CrXHSLG3M3jk8cwf/lbdnl+gLMnCziw+wgAs1/4HR5eRrZvTqei7DoXzl1i+rO/paqimuILpVSUVfHcyum4Gdw0yTbEz4v/6t8DgHXf5GMyWxjb05cR3X3o6u7G+mPnmXd3Xwx6HUP8vFj2VS55129qku228B7ezBrSE4D4f+Rxo66Buff1ZqCfJ9du1rPunwVYNNjDUhxsFBbVNElJSej1euLi4jCZTHbpAwwO7Ma9YUG4a/Rhbs7nf81k3rKpTHh0JEc+PwbA4888wpOLIvnVvf25d8yvOPn1Ga6VVmGxWDUrPoDHQnqQ9PU5dudf4eG+/gAM9femj6c7nd1cqKozszYrj7dPF7L3YrnmxQcwc0hPVhw4S9qpyzw6IACA8B6deemz7/i2tJqIIF9Ncig6F9VFa6oF6OXlxcSJE1m2bBnJycl3DEKjlfMXS9mw+e+aP++/s1qtuBlc8enqTdW1G03rP/3oKyZOGQPAr6PG8vzqp+je24/yKxWaZdMpUG8VXL1Vj5+xsfAziq+x+Mscvrh0lcFdG8eamRXai7QzJZrl+iG9olBnsVJWU0+AZ2PGj3Ov8NpDoYzs1Rlfo6smORztIIxqAY4aNYqDBw/i6+vLM888w7Vr17TK5XAM7q6Y6xuounaDzr7fD55UfqWSXkHdAEhP+wcAXj6emDRsZWotVlx1Cn5GN67V1gMwK7Q3AqisNdPZ4IKrTkFRoNpsn8GVbjVYcNMrBHi4UV7TmNGg17F8/xm+u1pDcXWtJjl0iovqojXVZxw/fjxlZWVUVlaSnp7OmjVrtMrlcB6IHM3byTuxNFi4Z9RALhVcoWuAD+7G73c1ewV15511H6HT6+gX2luzbB/nXWHl8Ltw0SkcvlxJP+9OHC2tYtXIu7AKeC0rj0AvI5dM2nzIf8wHJ0pYOzEUF51CxoUK7vLthKteR/LDodwyW0k7dVmbIA52RbyiNj/gihUrmD17Nrt27WLy5Mls3ryZjRs3tmrDcljCn04OS/jztXZYwgGj31C9/+yRP7RHnFZTbQFdXV0ZOHAgQgjCwsJwd3fXKpck2YbesVpA1QI0m828+uqrjB49mqysLHkuqOT0hIPtgqoWYEJCApcuXSI4OJjs7GwSEhK0yiVJtuFMLWBmZiYHDhxg3LhxbNu2jZEjRzJ//nytsklS+2tjC5idnc327duBxnOlvb292bp1K2fOnKFr16688MILvPfeexw7dgyj0ciSJUsICAhodnuq3RBpaWksWrSI1NRUtmzZQl5eXpvCS5Ld6RX1pQU7duwgMTGRqKgo0tPTATh27Bhr165l8ODBfPXVV+Tk5ODj44Ovry9+fn6q21MtQE9PT/z8/Bg0aBA6na5pwglJclqKorps2rSJ0NDQO5ZNmzY1/bnFYsFgMODv7095eTkAkydPZuXKlRw9epTKykpmzpxJfHw8gYGBHDp0SDWOakUVFBSwaNEiSkpKmDNnDjdvan8KkyS1qxZauZYmZzEajdTX1zdNwgJQV1dHUlISO3fupEePHpw+fZqhQ4fi7e1NQ4N6941qAQ4bNozhw4eTm5uL0Wjk3LlzqhuTJEcndG37DTht2jTi4uIwm82MHTuWvLw8zGYzL7/8MkajkaioKPLy8oiPj0cIQWxsrOr2VAuwpqaGBx54gMOHD7NgwQKWL1/epvCSZHdtLMCwsDBeffXVO9b179+fyMjIpttPPNH6kz1aLMDPP/+cwMBATCYT1dXVPzGuJDmYNhZge1M9CBMdHU1ZWRkzZ85kz549LF68WKtckmQbOkV90ZhqCxgUFERQUBAAM2Y45rmdkvSTOFgLKPsVpI7FmU5Fk6RfHGc6FU2SfnEcq/5kAUodi9A71qBMsgCljqWj7II66pXn/3Hfe/aO0Kx+8dpejf1TFC7pZe8I7UMehJEkO+ooLaAkOSTZDyhJ9iNkCyhJdiR/A0qSHbnIApQk+5H9gJJkP041LKEk/eLIXVBJsiNnbAFzc3PR6/UMGDDA1nkkybacrRsiMTGRXr16odfr2b17N8uWLdMilyTZRFsHZWpvLRagq6src+fOBeDNN9+0eSBJsilnOgr60ksvUVRUxJIlS3Bzc6O4uJhnn31Wq2yS1P6caRc0ISGB/Px8Bg8eDCCHppecnzPtgsbGxjJkyBB27tzJK6+8wubNm0lOTtYqmyS1u7ZekNuayVm2b99Ofn4+rq6urFixQnV7Lc4NMWfOHObNm8drr72GymS6kuQcdC0sLWjN5CxZWVnExcXh5eXF+fPnW4zTrODgYA4ePEjfvn2ZMmUKJ06caMUrlCQHptepLu0xOcvtSYwCAgKaHtMc1V3QSZMmcfz4cerr6wkLCyMxMbGtL79ZZ08WcGD3EQBmv/A7PLyMbN+cTkXZdS6cu8T0Z39LVUU1xRdKqSir4rmV03EzuNksT2tFjLmbxyePYf7yt+yWYWiAN9NCewCw9l95VNdbiOjjy+iePvgZ3Uj++jz39+pCiE8nunkYWHnoLHUWbWY7zs7OZfv2fQCsXPk03t6erF+/jdLSq+TknGfp0tkUFFwiP/8S1dU1JCUtwsPDaLtALfwGbI/JWXS6xnatrKyM8PBw9ThqdyYlJaHX64mLi8NkMrFr1y7VjbXF53/NZN6yqUx4dCRHPj8GwOPPPMKTiyL51b39uXfMrzj59RmulVZhsVgdoviCA7txb1gQ7nbOMjW0O/GHz7Lr7BUm9WucDPK+bt709TLS2eBKZa2ZMb260N3DgF5RNCs+gB079pGYuICoqIdIT2+cqmvx4ieJiZnHiBGDiYgYRp8+PVi9eiFDh4aSn19k0zxCr6guLbk9OUtaWhpGo/GOyVm+++47Ro4cybBhw0hISMBkMhESEqK6PdUW0MvLi4kTJ3LfffeRnJxMXV3dT3u1P4HVasXN4IpPV29Of/P9LEyffvQVE6eMAeDXUWMJHdKPD9/4hPIrFfh397VZntY4f7GUDZv/zrsbFtg1h15RqLcIrt6qY1RPHwAOFlawIesC00J7cE+AN+/nlHCs7AaLhwXR08NASY3t/i9/yGKxYjC44e/vS2bm9z9h3nvvE2bMmATAgw+O5OLFEvLyinjqqcjmNtU+2ngQpjWTs0ydOrXV21NNM2rUKA4ePIivry/PPPMM165d+4lxW8/g7oq5voGqazfo7OvdtL78SiW9groBkJ72DwC8fDwxXZdzFd5W22DFVafgZzRw9WY9ALPv7oUAKmrN+BhcmB3WOKhSZa2Zzu6ummUzGg3U15spL6/Az69L0/ri4jJCQvoAcOpUHtu27SEu7rmm3TebUVpYNKb6asePH0/fvn2prKwkPT2dNWvW2CzIA5GjeTt5J5/vPoK70Y1LBVe4VVOLu/H73bteQd15Z91HlF+uoF9ob5tlcTY7zlwm/j/u4vGBPbjZYCHEpxOZl6tIun8A4/r48uWlCvKrbhI3uj+9PN3JvWbSLNu0ab8hLi6VtLR9GI0G8vIKMZlu4uHhDoAQgiVLkqmsrObllzdy7txFm+bR69UXrSlCpW9hxYoVzJ49m127djF58mQ2b97Mxo0bW7Xh7Kt/b7eQ7UkOS/jz5Py+u70jtKB1FwoE/78M1fvPz49ojzCtpvob0NXVlYEDByKEICwsDHd3d61ySZJN6JzpTBiz2cyrr77K6NGjycrKwmrV7uiZJNmCg10O2PK5oJcuXSI4OJjs7GwSEhK0yiVJNqGzw+88NaoHYTIzM9m6dSsHDhzg9ddf5y9/+YtWuSTJJlo4EUZzqk+ZlpbGokWLSE1NZcuWLfJqCMnpKYr6ojXVXVBPT0/8/PwYNGgQOp2u6Rw3SXJWOme6HrCgoIBFixZRUlLCnDlzuHlTdn5Lzs2pDsIMGzaM4cOHk5ubi9Fo5Ny5c2oPlySHZ+sTbX4q1Tg1NTU88MADVFZW8vvf/152Q0hOT6eoL1pTbQFramr4/PPPCQwMxGQyUV1drVUuSbIJp2oBo6OjKSsrY+bMmezZs4fFixdrlUuSbEKnV1QXram2gEFBQQQFBQEwY8YMLfJIkk051UEYSfqlcbBhQWUBSh2LbAElyY4c7VxQWYBSh6I4WBNoswJ8er93yw+yA0e+6LUg/g17R2hWyfSZ9o6gqmen1l2Q62jdELIFlDoUeRBGkuzIwS6IlwUodSwuOseaXkEWoNShtHUP9McmZzl48CCHDx/m6tWrxMTEsHfvXo4dO4bRaGTJkiUEBATYLI8kORUXnVBdWvJjk7N88803FBYWcv36dbp06UJOTg4+Pj74+vo2DV/fHFmAUofS0uRIP2dylgceeIA33niDhx56iOPHjzNz5kzi4+MJDAzk0KFDqnnkLqjUoehbaOV+zuQsW7duZePGjfj6+lJVVUV5eTlDhw7F29ubhoYG1eeTLaDUobgo6ktLfmxyllGjRrFixQoyMjKIiIhACEF8fDyZmZmMGzdOPU87vS5Jcgo6pW1HQZubnOWHnnjiiVZvTxag1KG0ppXTkixAqUORHfGSZEdO1RFfXFzM9u3bKS8vx9/fn1mzZtGtWzetsklSu3O0o46qBbhp0yZeeOEF/Pz8KC8vJzk5mT/+8Y9aZZOkdudULWBDQwMWiwWr1YrVakVvjxkMJakdOdVBmEWLFrFjxw4qKysJCAiQo6JJTq+t3RDtTXWXODs7m5KSEiIjI1m4cCG7d+/WKJYk2UZbO+LbPY/anf/6179ITk4mNTUVIQQFBQVa5ZIkm3Cqboi6ujouX75MdHQ0KSkpNp2ebIifF//VvwcA677Jx2S2MLanLyO6+9DV3Y31x84z7+6+GPQ6hvh5seyrXPKuazdZzNAAb6aFNuZb+688qustRPTxZXRPH/yMbiR/fZ77e3UhxKcT3TwMrDx0ljqLfYfyjxhzN49PHsP85W/ZLcOp4xfYs+sIANHLpuDpZeSd/95Leel1zn1XzNPRj3DrVh2nv72Iub6B+UsjMbi72ixPS+eCak11F3TFihWYTCYAli5dyqxZs2wW5LGQHiR9fY7d+Vd4uK8/AEP9venj6U5nNxeq6syszcrj7dOF7L1YrmnxAUwN7U784bPsOnuFSf0ar++6r5s3fb2MdDa4UllrZkyvLnT3MKBXFLsXX3BgN+4NC8Ld4GbXHJ98nMnS2CgemTKCLz49DsDcBZOYvzSSocNCGDV2EBkHTqAo0NXf26bFBy1fDaE11ec8c+YMFy5c4PDhwzz//PPU19fbLogC9VbB1Vv1+BkbPzQZxddY/GUOX1y6yuCujYM8zQrtRdqZEpvlaI5eUai3CK7eqsO/U2O+g4UVLDhwmgMXrnJPgDfv55Sw9B/fUVh9i54eBs0z/tD5i6Vs2Px3u2YAsFqtuBlc6ernTcXVG03r/yftKyKjRgNQcbWa+UsjMZsbuJB/xaZ52no9YHtTLcC//e1vTJw4kU8//ZSNGzeSlZVlsyC1FiuuOgU/oxvXahsLfVZobwRQWWums8EFV52CokC1Wf0SD5vka7idz8DVm435Zt/dCwFU1JrxMbgwO6wX3M5r429yZ2Fwd6O+voFrV2/g29Wraf2Vy5UEBjee1NGtRxcURcHLuxNC2LYIXBX1RWuqvwH1ej21tbV4eze2PhaLxWZBPs67wsrhd+GiUzh8uZJ+3p04WlrFqpF3YRXwWlYegV5GLplqbZZBzY4zl4n/j7tw1ek4dKmCEJ9OZF6uIun+AViB1UfOMairJ3Gj+2MRgtxrxXbJ6Wj+87FR/GnNRzQ0WBgxZiAX8q8Q0N0Ho/H7PYSh4SFsWPsxBoMrQSHdbZrH0bohFKHylfPhhx+SmZnJiy++yL59+3Bzc2P27Nmt2nD4h+pXAtvLrVuO9R/wQ448Lmj+d44+LuijrXpc8on9qvcvG/JQe8RpNdUWcMaMGUyfPh1FUQgPD2fAgNYNfipJjsrBpohXL8DExETKysrQ6/UEBgby1ltv8cYbjvstLUktcbRzQVucojo1NZWGhgaWLFnS9FtQkpyVU01RbTKZKCoqQq/XU1RURE1NjVa5JMkm7HGkU41qC9ivXz+ioqLw8/Pjscceo0+fPlrlkiSbcNUJ1UVrqi1geXk5b7/9NqGhoTzyyCNNIwJLkrNytHNBVVtAIQSDBw/Gzc2NYcOGaZVJkmzGRae+aJ5H7c6amhqKioqAxmK8deuWJqEkyVb0DtYRr1qAgwYN4q9//WvT7YEDB9o8kCTZUluv+WvN5CwHDhwgPz8fV1dXVqxYoZ5H7c6FCxe2La0kOZi2/ga8PTnLiRMnSE9PZ/r06U2Ts5jNZrp06UJWVhbr168nNTWV8+fPExwc3HyetsWRJOfS0lHQ9picxcWlsV0LCAhoekxz5LigUofS0oGW9picRfe/E9GXlZURHh6unuenxZck59bWc0FvT85iNpsZO3bsHZOzKIrCqlWruH79OgkJCRgMBkJCQlS3JwtQ6lBcNJicZerUqa3P06Y0kuRkHK0jXhag1KHY43QzNTYrwPLL2g8b0RqFS3rZO0KzSqY77kWvIQM/sHcEVbcKW3dBrmwBJcmOnGpoekn6pVFkAUqS/chdUEmyI6c6GVuSfmkcrAGUBSh1LHIXVJLsSBagJNmRLEBJsiMHqz9ZgFLH4lQjY0vSL42jTc4iC1DqUBysAZQFKHUschdUkuxIngsqSXbkaN0QLY6KVl1djdlsJiMjg+vXr2uRSZJsRmlh0ZpqC7hmzRqsVitFRUVMnTqVNWvWsG7dOq2ySVK7c6rfgPX19SQmJrJz504efvhhDh8+bPNA4T28mTWkJwDx/8jjRl0Dc+/rzUA/T67drGfdPwuwND+rts1kZ+eyffs+AFaufBpvb0/Wr99GaelVcnLOs3TpbAoKLpGff4nq6hqSkhbh4WHUJNup4xfYs+sIANHLpuDpZeSd/95Leel1zn1XzNPRj3DrVh2nv72Iub6B+UsjMbi7apKtORFj7ubxyWOYv/wtTZ/X0bohVHdBBw8eTEZGBlOnTuWbb76hb9++Ng80c0hPVhw4S9qpyzw6IACA8B6deemz7/i2tJqIIF+bZ/gxO3bsIzFxAVFRD5GefgiAxYufJCZmHiNGDCYiYhh9+vRg9eqFDB0aSn5+kWbZPvk4k6WxUTwyZQRffHocgLkLJjF/aSRDh4UwauwgMg6cQFGgq7+33YsvOLAb94YF4W5w0/y5FUV90ZpqAY4bN47a2lrq6+sJDw9n8ODBNg+kVxTqLFbKauoJ8Gz8D/o49wqvPRTKyF6d8TXa58NjsVgxGNzw9/elvLyyaf17733CjBmTAHjwwZFcvFhCXl4RYWH9m9tUu7NarbgZXOnq503F1RtN6/8n7Ssio0YDUHG1mvlLIzGbG7iQf0WzbD/m/MVSNmz+u12eW9/CojXVAkxKSkKv1xMXF4fJZGLnzp02D3SrwYKbXiHAw43ymnoADHody/ef4burNRRX19o8w48xGg3U15spL6/Az69L0/ri4jJCQhonLj11Ko9t2/YQF/dc0+jIWjC4u1Ff38C1qzfw7erVtP7K5UoCg7sB0K1HFxRFwcu7E8IOu/COoq0tYHZ2NsuXL2f58uXcuNH4Zbd+/XpiYmKIjIwkIyODrVu38vzzzxMTE0NZWZnq9lQ/JV5eXkycOJFly5aRnJxMXV1d61/pz/TBiRLWTgxl1pCe3DRbuMu3E656HckPhzLI35MjRVU2z/Bjpk37DXFxqaSl7cNoNJCXV4jJdBMPD3egcfq2JUuSqays5uWXN3Lu3EXNsv3nY6P405qP2LMrE3ejgQv5V7hZU4vRaGh6zNDwEDas/Zhr5TcICumuWTZHo6BTXVpye3KWqKgo0tPTAVi8eDExMTGMGDGCiIgIcnJy8PHxwdfXt2n4+mbzCJWvwz179uDp6cmECRO4dOkSMTExvPfee616oX3/dLBVj9OaQw9LePOMvSM0y/GHJfywVY+rqk9XvX/bm/mkpqbesW7hwoVN80W89NJLrFu3jgsXLrBnz56m9ampqUyaNImQkBCOHz/OPffcw44dO+jevTsRERHNPp/qUdBHH32UL774gnfffZfAwEC2bdvWqhcpSY6qpVbu50zOAlBcXNw0D8Tp06cZOnQo3t7eNDSoj4+rmmbNmjWYzWbGjx9PTU0N8fHxqhuTJEenKHrVpSW3J2dJS0vDaDSSl5eHyWTCw8Oj6TFCCOLj48nMzGTcuHGq21NtAauqqvj1r38NQHBwMBkZGa15jZLkwNrW1/Bjk7MAxMbGNv37iSeeaPX2VAtQURSEEE1HzbQ8sidJtqBzsDlpVQvw66+/ZsKECVplkSQNOFEBvvDCCxrFkCRtKIoTFeDvfvc7rXJIkiYUB7smXl4PKHUoil1OOGueLECpQ3GqXVBJ+qVpzelmWpIFKHUosgWUJLuSB2EkyW7kQRhJsiPZDSFJdtSaE661JAtQ6lDkUVBJsiPFwYbGlgUodSgKg+wd4Q6qQ1JIkmRbjrVDLEkdjCxASbIjWYCSZEeyACXJjmQBSpIdyQKUJDuSBShJdmSXjviCggI2bdpEp06d8PT0JCsri6CgIEwmEzNmzGDUqFHEx8ej1+sxm828/PLLZGRkkJmZSXV1NbGxsezdu5f8/Hyqq6tJSkq6Y2BUR8u2atUqUlJS2iUfwKZNm8jNzaVTp07odDr8/f0pKCjAw8MDDw8P4uPj+eCDDzh58iRms5nIyEgGDhzI2rVrcXNzY/To0dx7771s3LgRvV5PdHQ0ffr0abd8P/Txxx+zYcMGhg8fDkDnzp155ZVX2LNnzx3v2ZdffsnJkydxcXEhLi7OJlkckrCD999/X+zfv18IIcSBAwfEwoULhRBClJWVidjYWJGXlyeSkpKEEELk5uaKvLw8ceDAASGEEJ999plIT09vur1lyxbx7bffipSUFLFq1SqxbNkyh8v2+OOPizVr1ohVq1YJq9XapnxCCLFx40Zx4cIFIYQQ3377rRg+fHjT7T/84Q+ioaFBREdHi+rqatHQ0CD27t0rcnNzRVFRkbBYLGLp0qUiJSVFFBYWiuLiYpGSktLmTM3ZtWuXGDt2rDCbzUII0fT+/Pt7dvDgQSGEEPHx8aKqqspmeRyNXXZBp02bRklJCYmJieTm5lJSUkJMTAxz585l3LhxhISEMGLECNasWcNHH31Ely5dePDBB6moqGD//v1MmDCBBx98kIsXL5KXl0dYWBgRERFERESQl5fncNm6d+/OypUr6d27Nzk5Oe30LjYKDQ3lt7/9LSkpKcyfPx8XFxcUReGll17iz3/+MwkJCXh6ejJw4EB69+5Namoq06ZNo7KykoCAAPz9/SkvL2/XTGoGDBhAQUHB/3nPxo8fz86dOyksLMRo1GZmYYdgj6p///33xY0bN4QQQrz99tviySefFEII0dDQIJ588klx9OhRcfToUSGEECdOnBBvvPGGKCwsFLGxseL69etCCCFOnjwpVq9eLerq6kRNTY1YsGCByM7OFs8995xDZRNCNLXKW7ZsEbm5uW3KJ8SdLeDRo0dFeHh40+2UlBSRlZUl3nzzTSGEEFarVSxcuFBYLBaRnJwsMjMzm7ZRVFQkiouLxbp169qcqTn/3gLu3r1b7N+///+8Z7ff03379okvvvjCZnkcjV3OBT179izr16/H09MTnU5Hfn4+QUFBWCwWQkJCmDNnDqtWrUKn01FbW8vixYt55ZVX6Nq1K25ubkRFRREXF8fgwYNRFIV58+bx+uuv07dvX44ePcrWrVvx9vZ2iGzPPvss8fHx3H///dy4cYPly5e3+f27/Rvwdkvh7+9PYWEhnp6e3Lhxg3Xr1rFr1y6OHTuGTqdj2LBhGAwG3n//fUJCQujXrx9Tpkxhw4YNWCwWli1bRvfutpkz8N9/A3p4eLBq1Sqeeuqppvds6tSpnDp1isLCQkwmE7GxsXTp0qWFLf8yyJOxJcmOZDeEJNmRLEBJsiNZgJJkR7IAJcmOZAFKkh3JApQkO5IFKEl29P8BpvVTVh9Qd2MAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 252x216 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if analysis=='COSMIC':\n",
    "    AA_signatures = ['SBS22_CI','SBS1536I_CI','DBS78D_CI','ID83C_CI']\n",
    "    corrMatrix = merged_sigs_with_metadata[AA_signatures].corr()\n",
    "    corrMatrix.rename({'SBS22_CI':'SBS22a','SBS1536I_CI':'SBS22b','DBS78D_CI':'DBS20','ID83C_CI':'ID23'}, axis=1, inplace=True)\n",
    "    corrMatrix.rename({'SBS22_CI':'SBS22a','SBS1536I_CI':'SBS22b','DBS78D_CI':'DBS20','ID83C_CI':'ID23'}, axis=0, inplace=True)\n",
    "    # corrMatrix = merged_sigs_with_metadata[merged_sigs_with_metadata['country']!='Serbia'][AA_signatures].corr()\n",
    "    import seaborn as sns\n",
    "    import matplotlib\n",
    "    import matplotlib.pyplot as plt\n",
    "    matplotlib.rcParams['pdf.fonttype'] = 42\n",
    "    matplotlib.rcParams['ps.fonttype'] = 42\n",
    "    sns.set(rc={'figure.figsize':(3.5,3)})\n",
    "    sns.set_context(\"paper\", rc={\"font.size\":7,\"axes.titlesize\":7,\"axes.labelsize\":7,\"xtick.labelsize\":7,\"ytick.labelsize\":7,\"legend.fontsize\":7})\n",
    "    svm = sns.heatmap(corrMatrix, annot=True, cmap=\"YlGnBu\")\n",
    "    plt.savefig(\"/Users/senkins/work/RCC_manuscript/Figures/Ext_Fig5_AA_correlations.pdf\")\n",
    "AA_counts = {}\n",
    "for country in data_merged.country.unique():\n",
    "    merged_sigs_localised = merged_sigs_with_metadata[merged_sigs_with_metadata.country==country]\n",
    "    merged_sigs_localised = merged_sigs_localised[AA_signatures]\n",
    "    SBS_signatures = [sig for sig in AA_signatures if 'SBS' in sig]\n",
    "    if len(SBS_signatures)>0:\n",
    "        merged_SBS_sigs_localised = merged_sigs_localised[SBS_signatures]\n",
    "        Any_SBS_AA_count = len(merged_SBS_sigs_localised.loc[~(merged_SBS_sigs_localised==0).all(axis=1)])\n",
    "    AA_count = merged_sigs_localised.astype(bool).sum(axis=0)\n",
    "    Any_AA_count = len(merged_sigs_localised.loc[~(merged_sigs_localised==0).all(axis=1)])\n",
    "    AA_count.loc['All cases'] = len(merged_sigs_localised)\n",
    "    if len(SBS_signatures)>0:\n",
    "        column_name = ' or '.join(SBS_signatures) + ' (%)'\n",
    "        column_name = column_name.replace('_CI','')\n",
    "        AA_count.loc[column_name] = '%i (%.1f)' % (Any_SBS_AA_count, 100*Any_SBS_AA_count/AA_count.loc['All cases'])\n",
    "    AA_count.loc['Any (%)'] = '%i (%.1f)' % (Any_AA_count, 100*Any_AA_count/AA_count.loc['All cases'])\n",
    "    for signature in AA_signatures:\n",
    "        N_AA = int(AA_count.loc[signature])\n",
    "        AA_count.loc[signature] = '%i (%.1f)' % (N_AA, 100*N_AA/AA_count.loc['All cases'])\n",
    "        if 'SBS22' in signature:\n",
    "            AA_count.loc['SBS22'] = N_AA\n",
    "    AA_counts[country.replace(' ','')] = AA_count\n",
    "AA_counts_dataframe = pd.DataFrame(AA_counts).T.sort_values(by=['SBS22'], ascending=False)\n",
    "N_cases = AA_counts_dataframe['All cases']\n",
    "AA_counts_dataframe.drop(labels=['All cases', 'SBS22'], axis=1,inplace = True)\n",
    "AA_counts_dataframe.insert(0, 'N cases', N_cases)\n",
    "AA_counts_dataframe.rename({'SBS22_CI':'SBS22 (%)','SBS1536I_CI':'SBS1536I (%)','DBS78D_CI':'DBS78D (%)','ID83C_CI':'ID83C (%)'}, axis=1, inplace=True)\n",
    "print(AA_counts_dataframe)\n",
    "AA_counts_dataframe.to_csv('/Users/senkins/work/RCC_manuscript/Figures and Tables/Count_tables/AA_counts.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add PFAS data\n",
    "PFAS = pd.read_csv('./PFAS/pfas_rcc_manuscript.csv', index_col='donor_id')\n",
    "PFAS_variables = [analyte for analyte in PFAS.columns if 'PF' in analyte and not 'omm' in analyte and not analyte in ['PFOA', 'PFOS']]\n",
    "PFAS = PFAS[PFAS_variables]\n",
    "LOQ_table = pd.read_excel('./PFAS/LOQ_table_renamed.xlsx', index_col=0)\n",
    "# apply LOQ limits (The standard practice for handling a measurement below the LOQ is to assign it the value of the LOQ/2.):\n",
    "for analyte in PFAS_variables:\n",
    "    LOQ = LOQ_table.loc[analyte].to_numpy()[0]\n",
    "    PFAS[analyte].values[PFAS[analyte].values < LOQ] = LOQ/2\n",
    "PFAS['PFOA'] = PFAS['BrPFOAngmL'] + PFAS['LPFOAngmL']\n",
    "PFAS['PFOS'] = PFAS['BrPFOSngmL'] + PFAS['LPFOSngmL']\n",
    "PFAS_variables = ['PFOA', 'PFOS']  #PFAS_variables.extend(['PFOA', 'PFOS']) \n",
    "PFAS = PFAS[PFAS_variables]\n",
    "merged_sigs_with_metadata = pd.concat([PFAS, merged_sigs_with_metadata], axis=1)\n",
    "# # create quantiles\n",
    "def jitter(a_series, noise_reduction=1000000):\n",
    "    return (np.random.random(len(a_series))*a_series.std()/noise_reduction)-(a_series.std()/(2*noise_reduction))\n",
    "PFAS_variables_quantiles = []\n",
    "for analyte in PFAS_variables:\n",
    "    analyte_q_name = analyte.split(' ', 1)[0] + '_q'\n",
    "    PFAS_variables_quantiles.append(analyte_q_name)\n",
    "    merged_sigs_with_metadata[analyte_q_name] = pd.qcut(merged_sigs_with_metadata[analyte] + jitter(merged_sigs_with_metadata[analyte]), 4, labels=['Q1','Q2','Q3','Q4'])\n",
    "# add recruiment year info needed as a covariate for PFAS\n",
    "recruitment_year = pd.read_csv('metadata/extra/MUT_RCC_recruitmentyear.csv', index_col=2)\n",
    "merged_sigs_with_metadata = pd.concat([recruitment_year['dor_yr'].dropna(), merged_sigs_with_metadata], axis=1)\n",
    "# Create year of recruitment groups\n",
    "bins = [1998,2005,2010,2015,9999]\n",
    "labels = ['1999-2005','2005-2010','2010-2015','2015+']\n",
    "merged_sigs_with_metadata['rec_period'] = pd.cut(merged_sigs_with_metadata['dor_yr'], bins=bins, labels=labels, right=True)\n",
    "# qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create age groups\n",
    "bins = [0,45,55,65,75,999]\n",
    "labels = ['0-45','45-55','55-65','65-75','75+']\n",
    "merged_sigs_with_metadata['age_group'] = pd.cut(merged_sigs_with_metadata['age_diag'], bins=bins, labels=labels, right=True)\n",
    "# qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create BMI groups\n",
    "merged_sigs_with_metadata['bmi'] = pd.to_numeric(merged_sigs_with_metadata['bmi'], errors='coerce')\n",
    "# merged_sigs_with_metadata['bmi_q'] = pd.qcut(merged_sigs_with_metadata['bmi'], 4, labels=['Q1','Q2','Q3','Q4'])\n",
    "# # <23.2 [23.2, 25.6], [25.8, 30.4] >30.5\n",
    "bmi_bins = [0,23,25,30,999]\n",
    "bmi_labels = ['0-23','23-25','25-30','>30']\n",
    "merged_sigs_with_metadata['bmi_q'] = pd.cut(merged_sigs_with_metadata['bmi'], bins=bmi_bins, labels=bmi_labels, right=True)\n",
    "merged_sigs_with_metadata['bmi_m'] = pd.qcut(merged_sigs_with_metadata['bmi'], 2, labels=['<median','>=median'])\n",
    "obesity_bmi_threshold = 30\n",
    "merged_sigs_with_metadata['obesity'] = False\n",
    "merged_sigs_with_metadata.loc[merged_sigs_with_metadata['bmi'] > obesity_bmi_threshold, 'obesity'] = True\n",
    "# qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create ever-variable for alcohol and tobacco\n",
    "merged_sigs_with_metadata['alcohol_ever'] = \"No\"\n",
    "merged_sigs_with_metadata.loc[merged_sigs_with_metadata['alcohol'] != 'Never', 'alcohol_ever'] = \"Yes\"\n",
    "merged_sigs_with_metadata.loc[merged_sigs_with_metadata['alcohol'] == \"Don't know\", 'alcohol_ever'] = np.nan\n",
    "merged_sigs_with_metadata['tobacco_ever'] = \"No\"\n",
    "merged_sigs_with_metadata.loc[merged_sigs_with_metadata['tobacco'] != 'Never', 'tobacco_ever'] = \"Yes\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(list(merged_sigs_with_metadata[merged_sigs_with_metadata['tobacco_ever']=='No'].index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# signatures and parameters to consider\n",
    "signatures = []\n",
    "for mutation_type in mutation_types:\n",
    "    columns_to_scan = merged_sigs_with_metadata.columns \n",
    "    if mutation_type == 'CNV' and analysis=='COSMIC':\n",
    "        mutation_type = 'CN'\n",
    "    if 'SBS' in mutation_type:\n",
    "        # SP output idiosyncrasy\n",
    "        columns_to_scan = [ col for col in columns_to_scan if \"CNV\" not in col ]\n",
    "        columns_to_scan = [ col for col in columns_to_scan if \"SV\" not in col ]\n",
    "    signatures += [col for col in columns_to_scan \n",
    "                   if mutation_type in col and not '_abs' in col and not 'burden' in col\n",
    "                   and not '_CI' in col and not '_bool' in col]\n",
    "# print(len(signatures), signatures)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "for mutation_type in mutation_types:\n",
    "    merged_sigs_with_metadata[mutation_type + '_burden_bool'] = pd.qcut(merged_sigs_with_metadata[mutation_type + '_burden'], 2, labels=[0,1])\n",
    "    merged_sigs_with_metadata[mutation_type + '_burden_bool'] = merged_sigs_with_metadata[mutation_type + '_burden_bool'].astype('float32')\n",
    "# qgrid.show_grid(merged_sigs_with_metadata, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# drop Serbian hypermutator\n",
    "if 'SBS' in mutation_types and 'PD47592a' in merged_sigs_with_metadata.index and drop_Serbian_hypermutator:\n",
    "    merged_sigs_with_metadata.drop('PD47592a', axis=0, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "961\n"
     ]
    }
   ],
   "source": [
    "print(len(merged_sigs_with_metadata))\n",
    "# merged_sigs_with_metadata[mutation_type + '_burden_bool'].to_csv('~/Desktop/RCC_burden_median.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1, its frequency is 0.77\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.85\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter age_group, signature SBS1\n",
      "Zero counts for signature SBS1: 481\n",
      "All counts for signature SBS1: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1261.1332 \n",
      "Link Function:         Logit             BIC:             -5270.8385\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -616.57   \n",
      "Date:                  2024-02-08 17:21  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1233.1    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.9710   0.2209 -4.3948 0.0000 -1.4040 -0.5380\n",
      "sex[T.Male]          -0.0397   0.1451 -0.2732 0.7847 -0.3241  0.2448\n",
      "country[T.Brazil]     0.1257   0.2476  0.5080 0.6115 -0.3595  0.6110\n",
      "country[T.Canada]     0.7523   0.2857  2.6332 0.0085  0.1923  1.3123\n",
      "country[T.Japan]      0.2846   0.3737  0.7615 0.4463 -0.4478  1.0170\n",
      "country[T.Lithuania]  0.4754   0.5427  0.8759 0.3811 -0.5883  1.5391\n",
      "country[T.Poland]    -0.5277   0.6304 -0.8371 0.4025 -1.7633  0.7078\n",
      "country[T.Romania]   -1.0465   0.3173 -3.2978 0.0010 -1.6684 -0.4245\n",
      "country[T.Russia]     0.4980   0.1954  2.5491 0.0108  0.1151  0.8809\n",
      "country[T.Serbia]    -0.7507   0.3071 -2.4444 0.0145 -1.3526 -0.1488\n",
      "country[T.Thailand]   0.1631   0.9369  0.1741 0.8618 -1.6733  1.9995\n",
      "country[T.UK]         0.4062   0.2338  1.7370 0.0824 -0.0521  0.8645\n",
      "tobacco_ever[T.Yes]  -0.0132   0.1443 -0.0918 0.9269 -0.2961  0.2696\n",
      "age_group             0.4416   0.0635  6.9582 0.0000  0.3172  0.5660\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.245601  0.583940  0.378703  1.108950e-05    3.093971e-03\n",
      "sex[T.Male]           0.723148  1.277402  0.961120  7.846898e-01    1.000000e+00\n",
      "country[T.Brazil]     0.698058  1.842186  1.133998  6.114804e-01    1.000000e+00\n",
      "country[T.Canada]     1.212095  3.714866  2.121973  8.458929e-03    1.000000e+00\n",
      "country[T.Japan]      0.639006  2.764914  1.329209  4.463316e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.555246  4.660370  1.608617  3.810815e-01    1.000000e+00\n",
      "country[T.Poland]     0.171483  2.029557  0.589945  4.025131e-01    1.000000e+00\n",
      "country[T.Romania]    0.188550  0.654086  0.351180  9.745417e-04    2.718971e-01\n",
      "country[T.Russia]     1.121984  2.413139  1.645449  1.080012e-02    1.000000e+00\n",
      "country[T.Serbia]     0.258574  0.861775  0.472052  1.451066e-02    1.000000e+00\n",
      "country[T.Thailand]   0.187634  7.385229  1.177165  8.617973e-01    1.000000e+00\n",
      "country[T.UK]         0.949202  2.373826  1.501080  8.238373e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.743735  1.309426  0.986846  9.268897e-01    1.000000e+00\n",
      "age_group             1.373265  1.761123  1.555149  3.445975e-12    9.614271e-10\n",
      "Running logistic regression with parameter age_group, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig SBS2, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 955\n",
      "All counts for signature SBS2: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.252354    7.165139  1.202918  5.572848e-01        1.000000\n",
      "country[T.Brazil]     0.003978    6.902463  0.551651  4.528026e-01        1.000000\n",
      "country[T.Canada]     0.004983    8.790606  0.693510  7.528573e-01        1.000000\n",
      "country[T.Japan]      0.009643   17.752792  1.357391  5.292038e-01        1.000000\n",
      "country[T.Lithuania]  0.022258   41.083299  3.130414  3.621502e-01        1.000000\n",
      "country[T.Poland]     0.027188   52.420407  3.855471  3.335520e-01        1.000000\n",
      "country[T.Romania]    0.005770    9.978882  0.799465  5.178005e-01        1.000000\n",
      "country[T.Russia]     0.177544    8.351672  1.213727  5.387204e-01        1.000000\n",
      "country[T.Serbia]     0.005567    9.879966  0.776013  5.380777e-01        1.000000\n",
      "country[T.Thailand]   0.060409  148.684751  9.072362  2.065472e-01        1.000000\n",
      "country[T.UK]         0.343534   14.921274  2.265973  2.726580e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.191248    4.822484  0.945485  5.254771e-01        1.000000\n",
      "age_group             0.505794    2.116040  1.019687  5.550151e-01        1.000000\n",
      "Intercept             0.000641    0.075838  0.009654  3.157254e-08        0.000009\n",
      "Running logistic regression with parameter age_group, signature SBS4\n",
      "Zero counts for signature SBS4: 410\n",
      "All counts for signature SBS4: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1253.1215 \n",
      "Link Function:         Logit             BIC:             -5278.8502\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -612.56   \n",
      "Date:                  2024-02-08 17:21  LL-Null:         -655.73   \n",
      "No. Observations:      961               Deviance:        1225.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.2898   0.2182 -1.3279 0.1842 -0.7174  0.1379\n",
      "sex[T.Male]          -0.2000   0.1457 -1.3726 0.1699 -0.4855  0.0856\n",
      "country[T.Brazil]    -0.6365   0.2512 -2.5340 0.0113 -1.1288 -0.1442\n",
      "country[T.Canada]    -0.5623   0.2772 -2.0285 0.0425 -1.1056 -0.0190\n",
      "country[T.Japan]      0.8305   0.4738  1.7530 0.0796 -0.0980  1.7590\n",
      "country[T.Lithuania] -0.1917   0.5429 -0.3531 0.7240 -1.2558  0.8724\n",
      "country[T.Poland]     0.7214   0.6908  1.0443 0.2964 -0.6326  2.0754\n",
      "country[T.Romania]   -0.9480   0.2913 -3.2545 0.0011 -1.5190 -0.3771\n",
      "country[T.Russia]    -0.3595   0.1957 -1.8364 0.0663 -0.7431  0.0242\n",
      "country[T.Serbia]    -0.5464   0.2831 -1.9302 0.0536 -1.1013  0.0084\n",
      "country[T.Thailand]  -0.3463   0.9311 -0.3719 0.7099 -2.1711  1.4786\n",
      "country[T.UK]        -0.0538   0.2434 -0.2211 0.8250 -0.5308  0.4232\n",
      "tobacco_ever[T.Yes]   0.6616   0.1453  4.5541 0.0000  0.3769  0.9463\n",
      "age_group             0.3406   0.0626  5.4412 0.0000  0.2179  0.4633\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.488003  1.147892  0.748448  1.842192e-01        1.000000\n",
      "sex[T.Male]           0.615366  1.089350  0.818748  1.698855e-01        1.000000\n",
      "country[T.Brazil]     0.323418  0.865727  0.529142  1.127735e-02        1.000000\n",
      "country[T.Canada]     0.331017  0.981188  0.569904  4.251165e-02        1.000000\n",
      "country[T.Japan]      0.906620  5.806870  2.294477  7.959572e-02        1.000000\n",
      "country[T.Lithuania]  0.284835  2.392621  0.825532  7.239855e-01        1.000000\n",
      "country[T.Poland]     0.531202  7.968059  2.057341  2.963683e-01        1.000000\n",
      "country[T.Romania]    0.218935  0.685851  0.387501  1.136001e-03        0.316944\n",
      "country[T.Russia]     0.475620  1.024477  0.698042  6.629485e-02        1.000000\n",
      "country[T.Serbia]     0.332439  1.008465  0.579010  5.358327e-02        1.000000\n",
      "country[T.Thailand]   0.114050  4.386586  0.707313  7.099492e-01        1.000000\n",
      "country[T.UK]         0.588115  1.526834  0.947604  8.249923e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.457702  2.576233  1.937881  5.261225e-06        0.001468\n",
      "age_group             1.243511  1.589371  1.405845  5.291094e-08        0.000015\n",
      "Running logistic regression with parameter age_group, signature SBS5\n",
      "Zero counts for signature SBS5: 884\n",
      "All counts for signature SBS5: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             510.2984  \n",
      "Link Function:         Logit             BIC:             -6021.6734\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -241.15   \n",
      "Date:                  2024-02-08 17:21  LL-Null:         -268.19   \n",
      "No. Observations:      961               Deviance:        482.30    \n",
      "Df Model:              13                Pearson chi2:    941.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.6946   0.4520 -8.1732 0.0000 -4.5806 -2.8086\n",
      "sex[T.Male]          -0.1023   0.2673 -0.3829 0.7018 -0.6261  0.4215\n",
      "country[T.Brazil]     0.2327   0.3786  0.6146 0.5388 -0.5093  0.9747\n",
      "country[T.Canada]    -0.6398   0.5130 -1.2470 0.2124 -1.6453  0.3658\n",
      "country[T.Japan]      0.1984   0.5090  0.3897 0.6968 -0.7993  1.1960\n",
      "country[T.Lithuania]  0.5316   0.6869  0.7739 0.4390 -0.8148  1.8780\n",
      "country[T.Poland]    -0.2157   1.0868 -0.1985 0.8427 -2.3458  1.9143\n",
      "country[T.Romania]   -1.4158   0.7513 -1.8843 0.0595 -2.8884  0.0568\n",
      "country[T.Russia]    -1.1240   0.4690 -2.3966 0.0165 -2.0432 -0.2048\n",
      "country[T.Serbia]    -0.4719   0.5621 -0.8395 0.4012 -1.5736  0.6298\n",
      "country[T.Thailand]   0.4677   1.1586  0.4036 0.6865 -1.8032  2.7385\n",
      "country[T.UK]        -0.6812   0.4275 -1.5932 0.1111 -1.5191  0.1568\n",
      "tobacco_ever[T.Yes]   0.4243   0.2694  1.5750 0.1153 -0.1037  0.9523\n",
      "age_group             0.6168   0.1244  4.9594 0.0000  0.3730  0.8606\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.010249   0.060289  0.024858  3.003636e-16    8.380144e-14\n",
      "sex[T.Male]           0.534654   1.524229  0.902738  7.018188e-01    1.000000e+00\n",
      "country[T.Brazil]     0.600891   2.650256  1.261949  5.388485e-01    1.000000e+00\n",
      "country[T.Canada]     0.192948   1.441634  0.527409  2.123920e-01    1.000000e+00\n",
      "country[T.Japan]      0.449657   3.306817  1.219399  6.967587e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.442738   6.540428  1.701674  4.390034e-01    1.000000e+00\n",
      "country[T.Poland]     0.095771   6.782426  0.805954  8.426530e-01    1.000000e+00\n",
      "country[T.Romania]    0.055667   1.058494  0.242742  5.952396e-02    1.000000e+00\n",
      "country[T.Russia]     0.129612   0.814824  0.324979  1.654771e-02    1.000000e+00\n",
      "country[T.Serbia]     0.207306   1.877208  0.623824  4.011752e-01    1.000000e+00\n",
      "country[T.Thailand]   0.164774  15.463515  1.596243  6.864825e-01    1.000000e+00\n",
      "country[T.UK]         0.218902   1.169773  0.506030  1.111166e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.901474   2.591741  1.528524  1.152672e-01    1.000000e+00\n",
      "age_group             1.452148   2.364508  1.853002  7.070953e-07    1.972796e-04\n",
      "Running logistic regression with parameter age_group, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 916\n",
      "All counts for signature SBS12: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.465071    2.524265    1.070095  7.940261e-01    1.000000e+00\n",
      "country[T.Brazil]      0.494468    9.464414    2.254001  2.615671e-01    1.000000e+00\n",
      "country[T.Canada]      1.311769   18.515799    4.800254  1.858320e-02    1.000000e+00\n",
      "country[T.Japan]      46.327813  528.814534  140.847779  1.993291e-24    5.561282e-22\n",
      "country[T.Lithuania]   0.011910   16.231686    1.607199  6.665923e-01    1.000000e+00\n",
      "country[T.Poland]      0.018979   26.673070    2.573986  5.215870e-01    1.000000e+00\n",
      "country[T.Romania]     0.370223   10.016911    2.165140  3.378470e-01    1.000000e+00\n",
      "country[T.Russia]      0.122448    3.327164    0.717194  6.233952e-01    1.000000e+00\n",
      "country[T.Serbia]      0.135009    7.588342    1.366899  6.706650e-01    1.000000e+00\n",
      "country[T.Thailand]    0.029118   49.255718    4.096510  4.073014e-01    1.000000e+00\n",
      "country[T.UK]          0.220563    5.914847    1.284845  6.840423e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.309222    1.655433    0.723576  4.165609e-01    1.000000e+00\n",
      "age_group              0.888536    1.747332    1.238523  2.009172e-01    1.000000e+00\n",
      "Intercept              0.002885    0.044899    0.012657  2.796214e-16    7.801438e-14\n",
      "Running logistic regression with parameter age_group, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 807\n",
      "All counts for signature SBS13: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.127287  2.462790  1.655735  9.794606e-03    1.000000e+00\n",
      "country[T.Brazil]     0.418998  1.593755  0.839764  5.878339e-01    1.000000e+00\n",
      "country[T.Canada]     0.392739  1.650584  0.832803  6.096553e-01    1.000000e+00\n",
      "country[T.Japan]      0.115561  1.246694  0.442444  1.301198e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.343460  4.304409  1.381843  6.016801e-01    1.000000e+00\n",
      "country[T.Poland]     0.001402  1.417840  0.180827  1.218343e-01    1.000000e+00\n",
      "country[T.Romania]    0.039471  0.602227  0.195700  2.395126e-03    6.682403e-01\n",
      "country[T.Russia]     0.945758  2.429837  1.513600  8.343334e-02    1.000000e+00\n",
      "country[T.Serbia]     0.311937  1.552226  0.732883  4.265373e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004099  5.088425  0.545066  6.443175e-01    1.000000e+00\n",
      "country[T.UK]         0.675298  2.080448  1.196862  5.236542e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.836646  1.749752  1.208187  3.080497e-01    1.000000e+00\n",
      "age_group             0.912975  1.257242  1.070513  3.967870e-01    1.000000e+00\n",
      "Intercept             0.063442  0.205292  0.115804  5.122414e-15    1.429154e-12\n",
      "Running logistic regression with parameter age_group, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 890\n",
      "All counts for signature SBS18: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.032579   3.080587  1.753911  3.653721e-02    1.000000e+00\n",
      "country[T.Brazil]     0.394458   2.113555  0.953939  7.828960e-01    1.000000e+00\n",
      "country[T.Canada]     0.206497   1.769045  0.673768  4.435272e-01    1.000000e+00\n",
      "country[T.Japan]      0.151010   2.572015  0.775389  6.623496e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.375509   7.239441  2.005130  3.513936e-01    1.000000e+00\n",
      "country[T.Poland]     0.119365   5.167568  1.139520  7.961894e-01    1.000000e+00\n",
      "country[T.Romania]    0.327200   2.382710  0.958571  7.943421e-01    1.000000e+00\n",
      "country[T.Russia]     0.294839   1.231669  0.613787  1.660166e-01    1.000000e+00\n",
      "country[T.Serbia]     0.282644   2.070118  0.830477  6.614248e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009301  11.929475  1.243455  7.727888e-01    1.000000e+00\n",
      "country[T.UK]         0.350881   1.850024  0.842125  6.291397e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.453154   1.236685  0.750254  2.494328e-01    1.000000e+00\n",
      "age_group             0.668749   1.027963  0.829600  8.462441e-02    1.000000e+00\n",
      "Intercept             0.052823   0.235230  0.114568  3.080887e-10    8.595675e-08\n",
      "Running logistic regression with parameter age_group, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig SBS21, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 956\n",
      "All counts for signature SBS21: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.171399      5.234059   0.884011  5.874751e-01        1.000000\n",
      "country[T.Brazil]     0.013726    474.702256   2.552817  3.773421e-01        1.000000\n",
      "country[T.Canada]     0.017881    621.014926   3.332013  5.517960e-01        1.000000\n",
      "country[T.Japan]      0.039746   1393.834621   7.442002  2.583965e-01        1.000000\n",
      "country[T.Lithuania]  0.090397   3227.197637  17.071964  1.629245e-01        1.000000\n",
      "country[T.Poland]     0.077872   2797.072690  14.762668  1.812838e-01        1.000000\n",
      "country[T.Romania]    1.720567   3002.094610  21.612695  1.135188e-02        1.000000\n",
      "country[T.Russia]     0.159592    456.700551   3.087434  3.252014e-01        1.000000\n",
      "country[T.Serbia]     0.017082    596.315789   3.191987  3.839307e-01        1.000000\n",
      "country[T.Thailand]   0.295495  12443.897451  60.439229  8.020075e-02        1.000000\n",
      "country[T.UK]         0.929098   1625.646851  11.703370  3.946253e-02        1.000000\n",
      "tobacco_ever[T.Yes]   0.256798      8.344817   1.351663  5.023740e-01        1.000000\n",
      "age_group             0.316170      1.486701   0.695730  2.719397e-01        1.000000\n",
      "Intercept             0.000025      0.054072   0.004006  1.798504e-08        0.000005\n",
      "Running logistic regression with parameter age_group, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 890\n",
      "All counts for signature SBS22: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]             0.288149     1.254628    0.605967  1.665145e-01    1.000000e+00\n",
      "country[T.Brazil]       1.161096    76.379659    7.267801  3.169420e-02    1.000000e+00\n",
      "country[T.Canada]       0.844084    73.194481    6.543899  6.962839e-02    1.000000e+00\n",
      "country[T.Japan]        0.014619    42.311241    2.168091  5.172770e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.027413    82.647569    4.115478  3.737048e-01    1.000000e+00\n",
      "country[T.Poland]       0.061841   195.494053    9.407364  2.245085e-01    1.000000e+00\n",
      "country[T.Romania]    128.038297  5176.267656  547.352269  6.129645e-40    1.710171e-37\n",
      "country[T.Russia]       0.003760    10.545667    0.552633  6.127877e-01    1.000000e+00\n",
      "country[T.Serbia]      18.195928   737.528686   78.406690  8.139541e-13    2.270932e-10\n",
      "country[T.Thailand]    18.368925  2532.473535  169.445899  9.379369e-06    2.616844e-03\n",
      "country[T.UK]           0.174731    27.511843    2.192677  4.348079e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.456179     2.069096    0.966920  7.098424e-01    1.000000e+00\n",
      "age_group               1.418144     2.808220    1.963569  2.440326e-05    6.808509e-03\n",
      "Intercept               0.000132     0.007934    0.001449  1.971007e-27    5.499109e-25\n",
      "Running logistic regression with parameter age_group, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig SBS44, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 955\n",
      "All counts for signature SBS44: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.245220      6.202517   1.106275  5.894203e-01    1.000000e+00\n",
      "country[T.Brazil]     0.435796   1223.163842   8.319516  1.066432e-01    1.000000e+00\n",
      "country[T.Canada]     0.016995    587.167274   3.158867  5.702954e-01    1.000000e+00\n",
      "country[T.Japan]      0.036531   1277.975854   6.832235  2.733098e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.093085   3313.692073  17.552693  1.587576e-01    1.000000e+00\n",
      "country[T.Poland]     0.079940   2863.346515  15.132694  1.793597e-01    1.000000e+00\n",
      "country[T.Romania]    1.792936   3129.350824  22.524720  1.058021e-02    1.000000e+00\n",
      "country[T.Russia]     0.175472    500.611598   3.388023  3.080005e-01    1.000000e+00\n",
      "country[T.Serbia]     0.018077    630.899722   3.377359  3.863663e-01    1.000000e+00\n",
      "country[T.Thailand]   0.333037  13683.059176  67.110527  7.535508e-02    1.000000e+00\n",
      "country[T.UK]         0.906548   1583.909586  11.404388  4.196309e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.388738     10.214761   1.781457  3.406797e-01    1.000000e+00\n",
      "age_group             0.365681      1.498940   0.747648  3.118027e-01    1.000000e+00\n",
      "Intercept             0.000016      0.034502   0.002519  4.743965e-10    1.323566e-07\n",
      "Running logistic regression with parameter age_group, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 481\n",
      "All counts for signature SBS1536A: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.345606  2.466747  1.818567  9.344643e-05    2.607156e-02\n",
      "country[T.Brazil]     0.179310  0.508878  0.303622  5.391794e-06    1.504310e-03\n",
      "country[T.Canada]     0.154877  0.489634  0.276796  9.713678e-06    2.710116e-03\n",
      "country[T.Japan]      0.026821  0.157798  0.067983  4.298529e-11    1.199290e-08\n",
      "country[T.Lithuania]  0.379517  4.021723  1.137340  8.201213e-01    1.000000e+00\n",
      "country[T.Poland]     0.304948  3.450321  0.990095  9.246937e-01    1.000000e+00\n",
      "country[T.Romania]    0.207614  0.694228  0.380429  1.672504e-03    4.666286e-01\n",
      "country[T.Russia]     0.207702  0.470869  0.313787  1.610551e-08    4.493438e-06\n",
      "country[T.Serbia]     0.277056  0.890186  0.497588  1.872183e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000200  0.255256  0.026803  5.411766e-04    1.509883e-01\n",
      "country[T.UK]         0.274757  0.726662  0.447403  1.160838e-03    3.238738e-01\n",
      "tobacco_ever[T.Yes]   0.813475  1.474020  1.094876  5.416542e-01    1.000000e+00\n",
      "age_group             1.762183  2.326847  2.019679  7.781277e-27    2.170976e-24\n",
      "Intercept             0.233780  0.584918  0.371289  1.724335e-05    4.810895e-03\n",
      "Running logistic regression with parameter age_group, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 481\n",
      "All counts for signature SBS1536B: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1266.0601 \n",
      "Link Function:         Logit             BIC:             -5265.9117\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -619.03   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1238.1    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1719   0.2241 -5.2287 0.0000 -1.6112 -0.7326\n",
      "sex[T.Male]          -0.0747   0.1449 -0.5154 0.6063 -0.3586  0.2093\n",
      "country[T.Brazil]    -0.6812   0.2610 -2.6097 0.0091 -1.1928 -0.1696\n",
      "country[T.Canada]    -0.5143   0.2802 -1.8359 0.0664 -1.0634  0.0348\n",
      "country[T.Japan]     -0.4021   0.3740 -1.0751 0.2823 -1.1352  0.3310\n",
      "country[T.Lithuania] -0.1763   0.5296 -0.3330 0.7391 -1.2143  0.8616\n",
      "country[T.Poland]    -0.3266   0.6127 -0.5331 0.5940 -1.5275  0.8742\n",
      "country[T.Romania]    0.1992   0.2921  0.6821 0.4952 -0.3732  0.7717\n",
      "country[T.Russia]     0.4117   0.1968  2.0915 0.0365  0.0259  0.7974\n",
      "country[T.Serbia]     0.1821   0.2860  0.6366 0.5244 -0.3784  0.7426\n",
      "country[T.Thailand]  -0.7464   0.9380 -0.7957 0.4262 -2.5848  1.0921\n",
      "country[T.UK]         0.2838   0.2371  1.1970 0.2313 -0.1809  0.7484\n",
      "tobacco_ever[T.Yes]   0.3441   0.1447  2.3781 0.0174  0.0605  0.6277\n",
      "age_group             0.5173   0.0644  8.0272 0.0000  0.3910  0.6436\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.199651  0.480651  0.309778  1.707273e-07    4.763291e-05\n",
      "sex[T.Male]           0.698665  1.232758  0.928055  6.062520e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303357  0.843996  0.505996  9.061854e-03    1.000000e+00\n",
      "country[T.Canada]     0.345266  1.035380  0.597898  6.637794e-02    1.000000e+00\n",
      "country[T.Japan]      0.321356  1.392304  0.668899  2.823246e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.296918  2.366952  0.838327  7.391365e-01    1.000000e+00\n",
      "country[T.Poland]     0.217077  2.397014  0.721344  5.939527e-01    1.000000e+00\n",
      "country[T.Romania]    0.688498  2.163458  1.220466  4.951727e-01    1.000000e+00\n",
      "country[T.Russia]     1.026218  2.219810  1.509308  3.648736e-02    1.000000e+00\n",
      "country[T.Serbia]     0.684922  2.101322  1.199684  5.243755e-01    1.000000e+00\n",
      "country[T.Thailand]   0.075414  2.980403  0.474092  4.262062e-01    1.000000e+00\n",
      "country[T.UK]         0.834529  2.113692  1.328133  2.313177e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.062375  1.873292  1.410723  1.740030e-02    1.000000e+00\n",
      "age_group             1.478438  1.903310  1.677476  9.972505e-16    2.782329e-13\n",
      "Running logistic regression with parameter age_group, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 845\n",
      "All counts for signature SBS1536F: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.650125   1.488847  0.980584  9.000461e-01    1.000000e+00\n",
      "country[T.Brazil]     0.730990   3.070220  1.525799  2.512066e-01    1.000000e+00\n",
      "country[T.Canada]     1.451735   5.859918  2.941631  3.142781e-03    8.768358e-01\n",
      "country[T.Japan]      0.601385   4.635639  1.801791  2.695751e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.598493   7.838257  2.448556  1.903225e-01    1.000000e+00\n",
      "country[T.Poland]     0.002879   2.985384  0.372925  4.259788e-01    1.000000e+00\n",
      "country[T.Romania]    0.074851   1.200943  0.376888  1.053765e-01    1.000000e+00\n",
      "country[T.Russia]     0.910607   2.859994  1.604777  1.013274e-01    1.000000e+00\n",
      "country[T.Serbia]     0.280697   2.018363  0.818527  6.686595e-01    1.000000e+00\n",
      "country[T.Thailand]   0.283356  16.955946  2.935053  3.081039e-01    1.000000e+00\n",
      "country[T.UK]         0.770335   3.030079  1.547576  2.130729e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.483592   1.114048  0.735575  1.463890e-01    1.000000e+00\n",
      "age_group             0.820592   1.166748  0.977949  7.909046e-01    1.000000e+00\n",
      "Intercept             0.066269   0.240694  0.128793  9.824822e-12    2.741125e-09\n",
      "Running logistic regression with parameter age_group, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 866\n",
      "All counts for signature SBS1536I: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.455980    1.761877    0.896424  5.191496e-01    1.000000e+00\n",
      "country[T.Brazil]      0.449911    7.811765    2.002186  2.635417e-01    1.000000e+00\n",
      "country[T.Canada]      0.002327    2.808592    0.307878  3.476195e-01    1.000000e+00\n",
      "country[T.Japan]       0.148192    8.165945    1.507134  4.742042e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.385815   23.051682    4.034247  1.617173e-01    1.000000e+00\n",
      "country[T.Poland]      0.017815   25.360794    2.431331  4.164332e-01    1.000000e+00\n",
      "country[T.Romania]    72.011661  655.727301  198.557016  3.109042e-39    8.674227e-37\n",
      "country[T.Russia]      0.050174    2.577508    0.497705  3.257964e-01    1.000000e+00\n",
      "country[T.Serbia]     29.923433  257.454199   80.465260  7.886361e-25    2.200295e-22\n",
      "country[T.Thailand]    7.987582  412.561786   53.436827  9.116935e-05    2.543625e-02\n",
      "country[T.UK]          0.001373    1.647099    0.181438  1.216908e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.578338    2.291137    1.144543  4.767930e-01    1.000000e+00\n",
      "age_group              1.568936    2.968918    2.128238  2.988216e-07    8.337123e-05\n",
      "Intercept              0.000745    0.011855    0.003273  1.557044e-29    4.344153e-27\n",
      "Running logistic regression with parameter age_group, signature DBS2\n",
      "Zero counts for signature DBS2: 560\n",
      "All counts for signature DBS2: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1267.8276 \n",
      "Link Function:         Logit             BIC:             -5264.1441\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -619.91   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -652.90   \n",
      "No. Observations:      961               Deviance:        1239.8    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2612   0.2242 -5.6261 0.0000 -1.7006 -0.8219\n",
      "sex[T.Male]           0.3185   0.1454  2.1901 0.0285  0.0335  0.6035\n",
      "country[T.Brazil]    -0.2595   0.2532 -1.0249 0.3054 -0.7559  0.2368\n",
      "country[T.Canada]    -0.4443   0.2776 -1.6003 0.1095 -0.9885  0.0999\n",
      "country[T.Japan]     -0.9747   0.3951 -2.4670 0.0136 -1.7492 -0.2003\n",
      "country[T.Lithuania] -0.0472   0.5301 -0.0891 0.9290 -1.0863  0.9918\n",
      "country[T.Poland]    -0.3297   0.6008 -0.5488 0.5831 -1.5073  0.8478\n",
      "country[T.Romania]    0.4115   0.2888  1.4249 0.1542 -0.1545  0.9776\n",
      "country[T.Russia]    -0.2759   0.1987 -1.3882 0.1651 -0.6653  0.1136\n",
      "country[T.Serbia]     0.3193   0.2810  1.1362 0.2559 -0.2315  0.8701\n",
      "country[T.Thailand]  -0.0545   0.9395 -0.0580 0.9538 -1.8959  1.7870\n",
      "country[T.UK]        -0.2359   0.2332 -1.0114 0.3118 -0.6931  0.2212\n",
      "tobacco_ever[T.Yes]   0.7451   0.1443  5.1644 0.0000  0.4623  1.0279\n",
      "age_group             0.2456   0.0622  3.9511 0.0001  0.1238  0.3674\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.182571  0.439613  0.283303  1.843297e-08        0.000005\n",
      "sex[T.Male]           1.034029  1.828499  1.375035  2.851870e-02        1.000000\n",
      "country[T.Brazil]     0.469585  1.267200  0.771400  3.054220e-01        1.000000\n",
      "country[T.Canada]     0.372131  1.105013  0.641256  1.095291e-01        1.000000\n",
      "country[T.Japan]      0.173921  0.818467  0.377291  1.362659e-02        1.000000\n",
      "country[T.Lithuania]  0.337469  2.696150  0.953870  9.290140e-01        1.000000\n",
      "country[T.Poland]     0.221502  2.334587  0.719108  5.831244e-01        1.000000\n",
      "country[T.Romania]    0.856818  2.658029  1.509121  1.541841e-01        1.000000\n",
      "country[T.Russia]     0.514095  1.120337  0.758920  1.650877e-01        1.000000\n",
      "country[T.Serbia]     0.793350  2.387116  1.376161  2.558686e-01        1.000000\n",
      "country[T.Thailand]   0.150188  5.971288  0.947004  9.537823e-01        1.000000\n",
      "country[T.UK]         0.500047  1.247629  0.789856  3.118220e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.587752  2.795076  2.106630  2.411644e-07        0.000067\n",
      "age_group             1.131734  1.443946  1.278344  7.779611e-05        0.021705\n",
      "Running logistic regression with parameter age_group, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 872\n",
      "All counts for signature DBS4: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.649549  1.667829  1.035505  8.324531e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303536  1.526864  0.715680  3.916557e-01    1.000000e+00\n",
      "country[T.Canada]     0.379066  1.956257  0.903695  7.992965e-01    1.000000e+00\n",
      "country[T.Japan]      0.090128  1.489515  0.458738  2.107495e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.993862  9.126831  3.176710  5.029639e-02    1.000000e+00\n",
      "country[T.Poland]     0.104032  4.368183  0.983992  8.503749e-01    1.000000e+00\n",
      "country[T.Romania]    0.156817  1.286745  0.504041  1.593594e-01    1.000000e+00\n",
      "country[T.Russia]     0.427891  1.460657  0.798890  4.589157e-01    1.000000e+00\n",
      "country[T.Serbia]     0.058474  0.915967  0.292268  3.244994e-02    1.000000e+00\n",
      "country[T.Thailand]   0.004005  4.998769  0.533547  6.295572e-01    1.000000e+00\n",
      "country[T.UK]         0.228764  1.142442  0.537991  1.081504e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.605964  1.551418  0.969476  8.146180e-01    1.000000e+00\n",
      "age_group             1.068929  1.612481  1.308855  8.824145e-03    1.000000e+00\n",
      "Intercept             0.037209  0.158745  0.078770  1.267446e-14    3.536175e-12\n",
      "Running logistic regression with parameter age_group, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 926\n",
      "All counts for signature DBS9: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.749738   3.371281  1.547963  2.341993e-01    1.000000e+00\n",
      "country[T.Brazil]     0.326835   3.238345  1.119427  7.355736e-01    1.000000e+00\n",
      "country[T.Canada]     0.396630   4.001047  1.369066  5.874001e-01    1.000000e+00\n",
      "country[T.Japan]      0.002089   2.189339  0.271052  2.637499e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.228317  10.339700  2.204904  3.976216e-01    1.000000e+00\n",
      "country[T.Poland]     0.006551   7.346715  0.858920  7.718383e-01    1.000000e+00\n",
      "country[T.Romania]    0.001286   1.304205  0.165939  9.694659e-02    1.000000e+00\n",
      "country[T.Russia]     0.486975   2.901014  1.193206  6.497223e-01    1.000000e+00\n",
      "country[T.Serbia]     0.308079   3.928812  1.244799  6.756603e-01    1.000000e+00\n",
      "country[T.Thailand]   0.015996  21.869489  2.163723  5.914256e-01    1.000000e+00\n",
      "country[T.UK]         0.090800   1.646073  0.473951  2.451247e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.462226   1.904089  0.938001  7.321785e-01    1.000000e+00\n",
      "age_group             0.813545   1.504158  1.102133  5.018870e-01    1.000000e+00\n",
      "Intercept             0.009131   0.085815  0.029856  6.065105e-14    1.692164e-11\n",
      "Running logistic regression with parameter age_group, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 868\n",
      "All counts for signature DBS78C: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.944471  2.463965  1.509243  8.530063e-02    1.000000e+00\n",
      "country[T.Brazil]     0.469791  2.311767  1.082165  8.080540e-01    1.000000e+00\n",
      "country[T.Canada]     0.917660  3.943163  1.936455  8.141775e-02    1.000000e+00\n",
      "country[T.Japan]      0.111133  1.876989  0.569543  3.820167e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.100050  4.063697  0.938064  8.708911e-01    1.000000e+00\n",
      "country[T.Poland]     0.002811  2.924819  0.364332  4.121034e-01    1.000000e+00\n",
      "country[T.Romania]    0.290054  2.082964  0.845629  7.148150e-01    1.000000e+00\n",
      "country[T.Russia]     0.481277  1.786985  0.936193  8.202276e-01    1.000000e+00\n",
      "country[T.Serbia]     0.396915  2.528706  1.070058  8.515292e-01    1.000000e+00\n",
      "country[T.Thailand]   0.007001  8.821741  0.933259  8.831526e-01    1.000000e+00\n",
      "country[T.UK]         0.679407  2.659092  1.363132  3.711935e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.741667  1.838999  1.164592  4.967608e-01    1.000000e+00\n",
      "age_group             0.975353  1.446585  1.185350  8.694366e-02    1.000000e+00\n",
      "Intercept             0.024806  0.108780  0.053308  4.389740e-19    1.224737e-16\n",
      "Running logistic regression with parameter age_group, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 903\n",
      "All counts for signature DBS78D: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.420844    1.508620   0.796372  4.455390e-01    1.000000e+00\n",
      "country[T.Brazil]      0.080349    4.073035   0.793587  6.771181e-01    1.000000e+00\n",
      "country[T.Canada]      0.302592    7.428085   1.726409  4.916779e-01    1.000000e+00\n",
      "country[T.Japan]       0.184085    9.882678   1.854295  4.801972e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.374169   21.591498   3.863117  1.970795e-01    1.000000e+00\n",
      "country[T.Poland]      0.739540   45.213474   7.766369  7.263108e-02    1.000000e+00\n",
      "country[T.Romania]    17.067865  127.592144  42.809054  9.416859e-20    2.627304e-17\n",
      "country[T.Russia]      0.311856    4.343047   1.193444  6.700521e-01    1.000000e+00\n",
      "country[T.Serbia]      3.926758   35.243258  11.100082  5.281057e-06    1.473415e-03\n",
      "country[T.Thailand]    0.019338   31.128863   2.698326  5.051161e-01    1.000000e+00\n",
      "country[T.UK]          0.181128    4.372493   1.026747  7.443558e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.413415    1.558633   0.804893  4.666517e-01    1.000000e+00\n",
      "age_group              1.172996    2.079095   1.548845  1.731261e-03    4.830217e-01\n",
      "Intercept              0.002633    0.030726   0.009823  8.827208e-23    2.462791e-20\n",
      "Running logistic regression with parameter age_group, signature ID1\n",
      "Zero counts for signature ID1: 481\n",
      "All counts for signature ID1: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1311.7455 \n",
      "Link Function:         Logit             BIC:             -5220.2262\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -641.87   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1283.7    \n",
      "Df Model:              13                Pearson chi2:    963.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.4605   0.2122 -2.1703 0.0300 -0.8763 -0.0446\n",
      "sex[T.Male]           0.1691   0.1417  1.1936 0.2326 -0.1086  0.4468\n",
      "country[T.Brazil]     0.1099   0.2432  0.4518 0.6514 -0.3667  0.5864\n",
      "country[T.Canada]     0.4379   0.2753  1.5906 0.1117 -0.1017  0.9776\n",
      "country[T.Japan]      0.0259   0.3642  0.0711 0.9433 -0.6879  0.7397\n",
      "country[T.Lithuania] -0.9043   0.5605 -1.6133 0.1067 -2.0028  0.1943\n",
      "country[T.Poland]    -1.5560   0.7837 -1.9855 0.0471 -3.0921 -0.0200\n",
      "country[T.Romania]   -0.4125   0.2872 -1.4365 0.1509 -0.9753  0.1503\n",
      "country[T.Russia]     0.2438   0.1915  1.2731 0.2030 -0.1316  0.6192\n",
      "country[T.Serbia]    -0.3541   0.2833 -1.2497 0.2114 -0.9094  0.2012\n",
      "country[T.Thailand]   1.1562   1.1321  1.0213 0.3071 -1.0627  3.3750\n",
      "country[T.UK]        -0.1919   0.2285 -0.8397 0.4011 -0.6398  0.2560\n",
      "tobacco_ever[T.Yes]  -0.3293   0.1405 -2.3440 0.0191 -0.6046 -0.0539\n",
      "age_group             0.2641   0.0603  4.3832 0.0000  0.1460  0.3823\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.416317   0.956354  0.630989  0.029984        1.000000\n",
      "sex[T.Male]           0.897117   1.563251  1.184238  0.232629        1.000000\n",
      "country[T.Brazil]     0.692995   1.797570  1.116113  0.651436        1.000000\n",
      "country[T.Canada]     0.903317   2.657947  1.549506  0.111690        1.000000\n",
      "country[T.Japan]      0.502629   2.095352  1.026248  0.943286        1.000000\n",
      "country[T.Lithuania]  0.134958   1.214431  0.404842  0.106670        1.000000\n",
      "country[T.Poland]     0.045406   0.980210  0.210969  0.047092        1.000000\n",
      "country[T.Romania]    0.377069   1.162201  0.661990  0.150858        1.000000\n",
      "country[T.Russia]     0.876731   1.857454  1.276122  0.202990        1.000000\n",
      "country[T.Serbia]     0.402782   1.222912  0.701831  0.211413        1.000000\n",
      "country[T.Thailand]   0.345534  29.223537  3.177690  0.307125        1.000000\n",
      "country[T.UK]         0.527411   1.291742  0.825396  0.401060        1.000000\n",
      "tobacco_ever[T.Yes]   0.546296   0.947484  0.719448  0.019079        1.000000\n",
      "age_group             1.157230   1.465580  1.302310  0.000012        0.003263\n",
      "Running logistic regression with parameter age_group, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 945\n",
      "All counts for signature ID2: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.374218   3.023756  1.029775  5.832641e-01        1.000000\n",
      "country[T.Brazil]     0.001731   2.056395  0.228403  1.739022e-01        1.000000\n",
      "country[T.Canada]     0.002556   3.068373  0.337809  3.919891e-01        1.000000\n",
      "country[T.Japan]      0.637901  16.462365  3.711590  1.057310e-01        1.000000\n",
      "country[T.Lithuania]  0.010753  13.726040  1.437142  5.163704e-01        1.000000\n",
      "country[T.Poland]     0.012237  16.077832  1.643945  5.033930e-01        1.000000\n",
      "country[T.Romania]    0.324377   7.820795  1.837371  3.283197e-01        1.000000\n",
      "country[T.Russia]     0.225203   3.170622  0.866209  5.396545e-01        1.000000\n",
      "country[T.Serbia]     0.002397   2.872017  0.316777  2.805266e-01        1.000000\n",
      "country[T.Thailand]   0.029905  49.227238  4.189119  3.224307e-01        1.000000\n",
      "country[T.UK]         0.347715   5.851776  1.525801  3.875624e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.252441   2.027974  0.724593  3.989168e-01        1.000000\n",
      "age_group             0.557156   1.322096  0.856259  3.622407e-01        1.000000\n",
      "Intercept             0.007881   0.130438  0.036090  5.625232e-09        0.000002\n",
      "Running logistic regression with parameter age_group, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 920\n",
      "All counts for signature ID3: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.457623   1.771572  0.890208  6.595917e-01    1.000000e+00\n",
      "country[T.Brazil]     0.246893   3.268034  1.007879  7.330826e-01    1.000000e+00\n",
      "country[T.Canada]     0.156369   3.047160  0.834773  7.872298e-01    1.000000e+00\n",
      "country[T.Japan]      0.309735   6.343414  1.684785  4.578241e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.005909   6.650686  0.775214  7.154907e-01    1.000000e+00\n",
      "country[T.Poland]     0.302147  14.969733  2.995547  2.671726e-01    1.000000e+00\n",
      "country[T.Romania]    2.329268  14.712246  5.784115  1.978430e-04    5.519820e-02\n",
      "country[T.Russia]     0.309987   2.541188  0.913985  7.302768e-01    1.000000e+00\n",
      "country[T.Serbia]     0.065364   2.778826  0.619049  5.185249e-01    1.000000e+00\n",
      "country[T.Thailand]   0.818659  56.997385  8.897890  6.399355e-02    1.000000e+00\n",
      "country[T.UK]         0.379409   3.447739  1.200193  6.300751e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.837451   3.309871  1.643232  1.397033e-01    1.000000e+00\n",
      "age_group             0.843451   1.505120  1.122191  4.035171e-01    1.000000e+00\n",
      "Intercept             0.008141   0.069129  0.025179  9.901916e-17    2.762635e-14\n",
      "Running logistic regression with parameter age_group, signature ID5\n",
      "Zero counts for signature ID5: 481\n",
      "All counts for signature ID5: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1146.5794 \n",
      "Link Function:         Logit             BIC:             -5385.3923\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -559.29   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1118.6    \n",
      "Df Model:              13                Pearson chi2:    964.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.4654   0.2410 -6.0807 0.0000 -1.9378 -0.9931\n",
      "sex[T.Male]           0.5113   0.1562  3.2737 0.0011  0.2052  0.8174\n",
      "country[T.Brazil]    -0.9738   0.2698 -3.6096 0.0003 -1.5026 -0.4451\n",
      "country[T.Canada]    -0.8139   0.2948 -2.7609 0.0058 -1.3917 -0.2361\n",
      "country[T.Japan]     -2.7842   0.5000 -5.5679 0.0000 -3.7643 -1.8041\n",
      "country[T.Lithuania] -0.0867   0.5632 -0.1539 0.8777 -1.1905  1.0172\n",
      "country[T.Poland]    -0.9973   0.6647 -1.5003 0.1335 -2.3002  0.3056\n",
      "country[T.Romania]    0.3524   0.3281  1.0739 0.2828 -0.2907  0.9954\n",
      "country[T.Russia]    -0.4554   0.2063 -2.2071 0.0273 -0.8598 -0.0510\n",
      "country[T.Serbia]    -0.0649   0.3026 -0.2144 0.8303 -0.6580  0.5282\n",
      "country[T.Thailand]  -2.3628   1.1588 -2.0391 0.0414 -4.6339 -0.0917\n",
      "country[T.UK]        -0.3619   0.2480 -1.4594 0.1445 -0.8479  0.1241\n",
      "tobacco_ever[T.Yes]  -0.0790   0.1531 -0.5160 0.6058 -0.3790  0.2211\n",
      "age_group             0.8113   0.0734 11.0519 0.0000  0.6674  0.9552\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.144023  0.370432  0.230977  1.196802e-09    3.339077e-07\n",
      "sex[T.Male]           1.227731  2.264541  1.667407  1.061676e-03    2.962077e-01\n",
      "country[T.Brazil]     0.222549  0.640784  0.377632  3.066150e-04    8.554558e-02\n",
      "country[T.Canada]     0.248641  0.789682  0.443111  5.763978e-03    1.000000e+00\n",
      "country[T.Japan]      0.023184  0.164618  0.061778  2.578485e-08    7.193972e-06\n",
      "country[T.Lithuania]  0.304065  2.765335  0.916974  8.776876e-01    1.000000e+00\n",
      "country[T.Poland]     0.100244  1.357378  0.368875  1.335371e-01    1.000000e+00\n",
      "country[T.Romania]    0.747740  2.705839  1.422415  2.828483e-01    1.000000e+00\n",
      "country[T.Russia]     0.423255  0.950296  0.634207  2.731048e-02    1.000000e+00\n",
      "country[T.Serbia]     0.517907  1.695914  0.937190  8.302578e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009717  0.912377  0.094155  4.143976e-02    1.000000e+00\n",
      "country[T.UK]         0.428304  1.132166  0.696356  1.444598e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.684513  1.247392  0.924044  6.058474e-01    1.000000e+00\n",
      "age_group             1.949190  2.599096  2.250807  2.145371e-28    5.985584e-26\n",
      "Running logistic regression with parameter age_group, signature ID8\n",
      "Zero counts for signature ID8: 260\n",
      "All counts for signature ID8: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1055.1280 \n",
      "Link Function:         Logit             BIC:             -5476.8438\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -513.56   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -561.04   \n",
      "No. Observations:      961               Deviance:        1027.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.0689   0.2399  0.2872 0.7739 -0.4013  0.5391\n",
      "sex[T.Male]           0.4162   0.1614  2.5777 0.0099  0.0997  0.7326\n",
      "country[T.Brazil]    -0.7763   0.2738 -2.8354 0.0046 -1.3130 -0.2397\n",
      "country[T.Canada]    -0.2465   0.3312 -0.7443 0.4567 -0.8956  0.4026\n",
      "country[T.Japan]     -1.5096   0.3958 -3.8145 0.0001 -2.2853 -0.7340\n",
      "country[T.Lithuania]  0.4631   0.7839  0.5908 0.5547 -1.0733  1.9996\n",
      "country[T.Poland]    -0.2932   0.6458 -0.4540 0.6498 -1.5589  0.9725\n",
      "country[T.Romania]   -0.6716   0.3264 -2.0574 0.0396 -1.3113 -0.0318\n",
      "country[T.Russia]    -0.1172   0.2280 -0.5140 0.6072 -0.5642  0.3297\n",
      "country[T.Serbia]    -0.7006   0.3075 -2.2788 0.0227 -1.3033 -0.0980\n",
      "country[T.Thailand]  -1.2885   0.9449 -1.3636 0.1727 -3.1406  0.5635\n",
      "country[T.UK]        -0.2436   0.2818 -0.8644 0.3874 -0.7960  0.3088\n",
      "tobacco_ever[T.Yes]  -0.0028   0.1613 -0.0176 0.9860 -0.3189  0.3133\n",
      "age_group             0.5556   0.0716  7.7645 0.0000  0.4153  0.6958\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.669452  1.714475  1.071335  7.739414e-01    1.000000e+00\n",
      "sex[T.Male]           1.104877  2.080432  1.516121  9.945063e-03    1.000000e+00\n",
      "country[T.Brazil]     0.269022  0.786869  0.460093  4.576735e-03    1.000000e+00\n",
      "country[T.Canada]     0.408360  1.495734  0.781536  4.567074e-01    1.000000e+00\n",
      "country[T.Japan]      0.101744  0.480008  0.220994  1.364481e-04    3.806901e-02\n",
      "country[T.Lithuania]  0.341862  7.386022  1.589025  5.546745e-01    1.000000e+00\n",
      "country[T.Poland]     0.210375  2.644453  0.745873  6.498000e-01    1.000000e+00\n",
      "country[T.Romania]    0.269459  0.968702  0.510906  3.964978e-02    1.000000e+00\n",
      "country[T.Russia]     0.568825  1.390615  0.889391  6.072450e-01    1.000000e+00\n",
      "country[T.Serbia]     0.271646  0.906629  0.496268  2.268052e-02    1.000000e+00\n",
      "country[T.Thailand]   0.043258  1.756766  0.275672  1.726786e-01    1.000000e+00\n",
      "country[T.UK]         0.451151  1.361729  0.783802  3.873779e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.726928  1.367866  0.997166  9.859598e-01    1.000000e+00\n",
      "age_group             1.514874  2.005347  1.742943  8.199492e-15    2.287658e-12\n",
      "Running logistic regression with parameter age_group, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig ID9, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 957\n",
      "All counts for signature ID9: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.185678   12.336978   1.196410  6.612038e-01        1.000000\n",
      "country[T.Brazil]     0.004189    7.302258   0.581571  4.784588e-01        1.000000\n",
      "country[T.Canada]     0.160117   14.016129   1.796904  5.479595e-01        1.000000\n",
      "country[T.Japan]      0.007389   14.299791   1.055337  6.345034e-01        1.000000\n",
      "country[T.Lithuania]  0.022404   42.729347   3.174849  4.062195e-01        1.000000\n",
      "country[T.Poland]     0.025651   50.730402   3.667075  3.548069e-01        1.000000\n",
      "country[T.Romania]    0.005970   10.370154   0.828074  5.406973e-01        1.000000\n",
      "country[T.Russia]     0.002148    4.112552   0.304443  3.377300e-01        1.000000\n",
      "country[T.Serbia]     0.233843   21.847897   2.686228  3.240590e-01        1.000000\n",
      "country[T.Thailand]   0.066857  173.327968  10.085983  2.187983e-01        1.000000\n",
      "country[T.UK]         0.002788    4.949229   0.389226  3.688637e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.308024   23.088732   2.059376  3.443659e-01        1.000000\n",
      "age_group             0.552409    3.278848   1.264666  4.922317e-01        1.000000\n",
      "Intercept             0.000089    0.053370   0.003927  3.322967e-08        0.000009\n",
      "Running logistic regression with parameter age_group, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig ID11, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 953\n",
      "All counts for signature ID11: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.137026   2.422243  0.613014  3.991068e-01         1.00000\n",
      "country[T.Brazil]     0.002927   4.255458  0.397376  4.272206e-01         1.00000\n",
      "country[T.Canada]     0.203576  14.197568  2.181766  4.204162e-01         1.00000\n",
      "country[T.Japan]      0.009958  15.222180  1.365803  5.958346e-01         1.00000\n",
      "country[T.Lithuania]  0.012295  19.378197  1.699896  5.265037e-01         1.00000\n",
      "country[T.Poland]     0.033881  65.409000  4.839885  3.259077e-01         1.00000\n",
      "country[T.Romania]    0.003838   5.501099  0.519624  5.142460e-01         1.00000\n",
      "country[T.Russia]     0.218359   5.569579  1.097867  6.164876e-01         1.00000\n",
      "country[T.Serbia]     0.003971   5.730619  0.538336  4.962380e-01         1.00000\n",
      "country[T.Thailand]   0.021493  43.116130  3.111328  4.082013e-01         1.00000\n",
      "country[T.UK]         0.116350   7.738644  1.226265  6.247734e-01         1.00000\n",
      "tobacco_ever[T.Yes]   0.023986   1.213531  0.235698  7.455495e-02         1.00000\n",
      "age_group             0.679254   2.379662  1.234258  4.093597e-01         1.00000\n",
      "Intercept             0.001768   0.114194  0.018709  2.138700e-07         0.00006\n",
      "Running logistic regression with parameter age_group, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter age_group, sig ID12, perfect or near-perfect separation for category 3. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 954\n",
      "All counts for signature ID12: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.128605      2.395574   0.576850  0.349191        1.000000\n",
      "country[T.Brazil]     1.333380   1956.675846  14.393065  0.019249        1.000000\n",
      "country[T.Canada]     0.022074    780.105632   4.148480  0.482822        1.000000\n",
      "country[T.Japan]      0.051370   1823.303871   9.672546  0.196098        1.000000\n",
      "country[T.Lithuania]  0.106766   3905.391477  20.388273  0.133156        1.000000\n",
      "country[T.Poland]     0.065907   2454.444739  12.738588  0.178832        1.000000\n",
      "country[T.Romania]    0.020815    725.687006   3.887272  0.245807        1.000000\n",
      "country[T.Russia]     0.529068    756.928969   5.593369  0.131457        1.000000\n",
      "country[T.Serbia]     0.014123    492.815566   2.638875  0.352869        1.000000\n",
      "country[T.Thailand]   0.322065  14731.612912  68.472752  0.066981        1.000000\n",
      "country[T.UK]         0.426776   1197.409658   8.142941  0.099089        1.000000\n",
      "tobacco_ever[T.Yes]   0.117015      2.788629   0.653089  0.450095        1.000000\n",
      "age_group             0.205483      0.910157   0.461979  0.020336        1.000000\n",
      "Intercept             0.000091      0.124469   0.012695  0.000001        0.000389\n",
      "Running logistic regression with parameter age_group, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 944\n",
      "All counts for signature ID83C: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.386814      3.584608    1.145584  6.504633e-01    1.000000e+00\n",
      "country[T.Brazil]      0.016332    560.359192    3.025082  4.763738e-01    1.000000e+00\n",
      "country[T.Canada]      0.579863   1630.616888   11.084654  1.043989e-01    1.000000e+00\n",
      "country[T.Japan]       0.033745   1185.169352    6.324505  3.132266e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.076864   2723.726834   14.469838  1.914579e-01    1.000000e+00\n",
      "country[T.Poland]      0.137312   4949.091905   26.048856  1.314863e-01    1.000000e+00\n",
      "country[T.Romania]    16.640395  16612.852435  129.032687  1.952549e-10    5.447613e-08\n",
      "country[T.Russia]      0.007819    271.118071    1.455904  6.151711e-01    1.000000e+00\n",
      "country[T.Serbia]      3.036852   4362.863473   32.198499  2.387989e-03    6.662489e-01\n",
      "country[T.Thailand]    0.176828   6974.318483   35.105375  1.165617e-01    1.000000e+00\n",
      "country[T.UK]          0.012284    422.343117    2.277772  5.064456e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.238367      2.285891    0.754108  4.981554e-01    1.000000e+00\n",
      "age_group              0.923130      2.470521    1.472286  9.588224e-02    1.000000e+00\n",
      "Intercept              0.000006      0.009959    0.000872  4.248561e-18    1.185349e-15\n",
      "Running logistic regression with parameter age_group, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 481\n",
      "All counts for signature SBS_burden: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.424533   2.708518  1.959239  3.148233e-05    8.783569e-03\n",
      "country[T.Brazil]     0.321739   0.952125  0.555417  3.242355e-02    1.000000e+00\n",
      "country[T.Canada]     0.176452   0.596007  0.326817  2.438769e-04    6.804166e-02\n",
      "country[T.Japan]      0.187811   0.972171  0.426904  4.265378e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.285147   2.541897  0.838477  7.416656e-01    1.000000e+00\n",
      "country[T.Poland]     0.193742   2.584661  0.732695  6.190425e-01    1.000000e+00\n",
      "country[T.Romania]    3.496526  19.547025  7.842931  7.955217e-08    2.219506e-05\n",
      "country[T.Russia]     0.487470   1.127575  0.741804  1.619068e-01    1.000000e+00\n",
      "country[T.Serbia]     0.820998   2.823028  1.515669  1.839464e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000263   0.363622  0.035843  2.445103e-03    6.821836e-01\n",
      "country[T.UK]         0.459913   1.252598  0.759114  2.789023e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.842714   1.575609  1.151715  3.679264e-01    1.000000e+00\n",
      "age_group             2.394166   3.292596  2.796220  7.270876e-49    2.028574e-46\n",
      "Intercept             0.056066   0.158684  0.095329  9.447907e-22    2.635966e-19\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter age_group, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 530\n",
      "All counts for signature DBS_burden: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.161888  2.185593  1.590682  3.704111e-03    1.000000e+00\n",
      "country[T.Brazil]     0.379947  1.069320  0.639249  8.832308e-02    1.000000e+00\n",
      "country[T.Canada]     0.230532  0.736796  0.414998  2.588723e-03    7.222538e-01\n",
      "country[T.Japan]      0.091265  0.468360  0.211999  1.001378e-04    2.793843e-02\n",
      "country[T.Lithuania]  0.539177  5.032744  1.558902  4.183740e-01    1.000000e+00\n",
      "country[T.Poland]     0.198378  2.262121  0.694182  5.410311e-01    1.000000e+00\n",
      "country[T.Romania]    1.810707  8.689513  3.819218  2.976646e-04    8.304843e-02\n",
      "country[T.Russia]     0.502772  1.159735  0.764028  2.060447e-01    1.000000e+00\n",
      "country[T.Serbia]     0.747300  2.418484  1.339524  3.265771e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000354  0.454511  0.047439  4.866875e-03    1.000000e+00\n",
      "country[T.UK]         0.622362  1.825153  1.061828  8.208100e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.801407  1.490542  1.092489  5.656693e-01    1.000000e+00\n",
      "age_group             1.784440  2.384883  2.057017  3.323695e-26    9.273110e-24\n",
      "Intercept             0.123892  0.325875  0.202480  1.402747e-11    3.913663e-09\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter age_group, signature ID_burden\n",
      "Zero counts for signature ID_burden: 482\n",
      "All counts for signature ID_burden: 961\n",
      "Covariates used: ['sex', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1112.6005 \n",
      "Link Function:         Logit             BIC:             -5419.3712\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -542.30   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1084.6    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.7446   0.2496 -6.9894 0.0000 -2.2338 -1.2554\n",
      "sex[T.Male]           0.6143   0.1602  3.8349 0.0001  0.3003  0.9282\n",
      "country[T.Brazil]    -0.8240   0.2725 -3.0236 0.0025 -1.3582 -0.2899\n",
      "country[T.Canada]    -1.0580   0.3041 -3.4795 0.0005 -1.6539 -0.4620\n",
      "country[T.Japan]     -2.6428   0.4840 -5.4600 0.0000 -3.5915 -1.6941\n",
      "country[T.Lithuania] -0.6290   0.5554 -1.1324 0.2575 -1.7176  0.4596\n",
      "country[T.Poland]    -0.9165   0.6760 -1.3558 0.1752 -2.2415  0.4084\n",
      "country[T.Romania]    0.8116   0.3529  2.3000 0.0214  0.1200  1.5033\n",
      "country[T.Russia]    -0.3323   0.2098 -1.5839 0.1132 -0.7435  0.0789\n",
      "country[T.Serbia]    -0.1166   0.3088 -0.3776 0.7057 -0.7218  0.4886\n",
      "country[T.Thailand]  -2.3970   1.1676 -2.0529 0.0401 -4.6856 -0.1085\n",
      "country[T.UK]        -0.2728   0.2526 -1.0801 0.2801 -0.7679  0.2223\n",
      "tobacco_ever[T.Yes]  -0.1641   0.1560 -1.0517 0.2929 -0.4698  0.1417\n",
      "age_group             0.9136   0.0770 11.8696 0.0000  0.7627  1.0644\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.107118  0.284966  0.174715  2.760020e-12    7.700455e-10\n",
      "sex[T.Male]           1.350310  2.530076  1.848347  1.256255e-04    3.504950e-02\n",
      "country[T.Brazil]     0.257129  0.748353  0.438661  2.497618e-03    6.968353e-01\n",
      "country[T.Canada]     0.191296  0.630008  0.347157  5.024391e-04    1.401805e-01\n",
      "country[T.Japan]      0.027557  0.183759  0.071161  4.761125e-08    1.328354e-05\n",
      "country[T.Lithuania]  0.179494  1.583516  0.533134  2.574595e-01    1.000000e+00\n",
      "country[T.Poland]     0.106297  1.504476  0.399901  1.751699e-01    1.000000e+00\n",
      "country[T.Romania]    1.127479  4.496293  2.251549  2.144958e-02    1.000000e+00\n",
      "country[T.Russia]     0.475434  1.082103  0.717265  1.132222e-01    1.000000e+00\n",
      "country[T.Serbia]     0.485887  1.630006  0.889943  7.057153e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009227  0.897172  0.090986  4.008294e-02    1.000000e+00\n",
      "country[T.UK]         0.463986  1.248885  0.761226  2.801020e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.625135  1.152204  0.848695  2.929305e-01    1.000000e+00\n",
      "age_group             2.144108  2.899188  2.493225  1.701868e-32    4.748212e-30\n",
      "Using below/above median model for signature SBS1, its frequency is 0.77\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.85\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter sex, signature SBS1\n",
      "Zero counts for signature SBS1: 481\n",
      "All counts for signature SBS1: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1261.1332 \n",
      "Link Function:         Logit             BIC:             -5270.8385\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -616.57   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1233.1    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.9710   0.2209 -4.3948 0.0000 -1.4040 -0.5380\n",
      "sex[T.Male]          -0.0397   0.1451 -0.2732 0.7847 -0.3241  0.2448\n",
      "country[T.Brazil]     0.1257   0.2476  0.5080 0.6115 -0.3595  0.6110\n",
      "country[T.Canada]     0.7523   0.2857  2.6332 0.0085  0.1923  1.3123\n",
      "country[T.Japan]      0.2846   0.3737  0.7615 0.4463 -0.4478  1.0170\n",
      "country[T.Lithuania]  0.4754   0.5427  0.8759 0.3811 -0.5883  1.5391\n",
      "country[T.Poland]    -0.5277   0.6304 -0.8371 0.4025 -1.7633  0.7078\n",
      "country[T.Romania]   -1.0465   0.3173 -3.2978 0.0010 -1.6684 -0.4245\n",
      "country[T.Russia]     0.4980   0.1954  2.5491 0.0108  0.1151  0.8809\n",
      "country[T.Serbia]    -0.7507   0.3071 -2.4444 0.0145 -1.3526 -0.1488\n",
      "country[T.Thailand]   0.1631   0.9369  0.1741 0.8618 -1.6733  1.9995\n",
      "country[T.UK]         0.4062   0.2338  1.7370 0.0824 -0.0521  0.8645\n",
      "tobacco_ever[T.Yes]  -0.0132   0.1443 -0.0918 0.9269 -0.2961  0.2696\n",
      "age_group             0.4416   0.0635  6.9582 0.0000  0.3172  0.5660\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.245601  0.583940  0.378703  1.108950e-05    3.093971e-03\n",
      "sex[T.Male]           0.723148  1.277402  0.961120  7.846898e-01    1.000000e+00\n",
      "country[T.Brazil]     0.698058  1.842186  1.133998  6.114804e-01    1.000000e+00\n",
      "country[T.Canada]     1.212095  3.714866  2.121973  8.458929e-03    1.000000e+00\n",
      "country[T.Japan]      0.639006  2.764914  1.329209  4.463316e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.555246  4.660370  1.608617  3.810815e-01    1.000000e+00\n",
      "country[T.Poland]     0.171483  2.029557  0.589945  4.025131e-01    1.000000e+00\n",
      "country[T.Romania]    0.188550  0.654086  0.351180  9.745417e-04    2.718971e-01\n",
      "country[T.Russia]     1.121984  2.413139  1.645449  1.080012e-02    1.000000e+00\n",
      "country[T.Serbia]     0.258574  0.861775  0.472052  1.451066e-02    1.000000e+00\n",
      "country[T.Thailand]   0.187634  7.385229  1.177165  8.617973e-01    1.000000e+00\n",
      "country[T.UK]         0.949202  2.373826  1.501080  8.238373e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.743735  1.309426  0.986846  9.268897e-01    1.000000e+00\n",
      "age_group             1.373265  1.761123  1.555149  3.445975e-12    9.614271e-10\n",
      "Running logistic regression with parameter sex, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 955\n",
      "All counts for signature SBS2: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.252354    7.165139  1.202918  5.572848e-01        1.000000\n",
      "country[T.Brazil]     0.003978    6.902463  0.551651  4.528026e-01        1.000000\n",
      "country[T.Canada]     0.004983    8.790606  0.693510  7.528573e-01        1.000000\n",
      "country[T.Japan]      0.009643   17.752792  1.357391  5.292038e-01        1.000000\n",
      "country[T.Lithuania]  0.022258   41.083299  3.130414  3.621502e-01        1.000000\n",
      "country[T.Poland]     0.027188   52.420407  3.855471  3.335520e-01        1.000000\n",
      "country[T.Romania]    0.005770    9.978882  0.799465  5.178005e-01        1.000000\n",
      "country[T.Russia]     0.177544    8.351672  1.213727  5.387204e-01        1.000000\n",
      "country[T.Serbia]     0.005567    9.879966  0.776013  5.380777e-01        1.000000\n",
      "country[T.Thailand]   0.060409  148.684751  9.072362  2.065472e-01        1.000000\n",
      "country[T.UK]         0.343534   14.921274  2.265973  2.726580e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.191248    4.822484  0.945485  5.254771e-01        1.000000\n",
      "age_group             0.505794    2.116040  1.019687  5.550151e-01        1.000000\n",
      "Intercept             0.000641    0.075838  0.009654  3.157254e-08        0.000009\n",
      "Running logistic regression with parameter sex, signature SBS4\n",
      "Zero counts for signature SBS4: 410"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "All counts for signature SBS4: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1253.1215 \n",
      "Link Function:         Logit             BIC:             -5278.8502\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -612.56   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -655.73   \n",
      "No. Observations:      961               Deviance:        1225.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.2898   0.2182 -1.3279 0.1842 -0.7174  0.1379\n",
      "sex[T.Male]          -0.2000   0.1457 -1.3726 0.1699 -0.4855  0.0856\n",
      "country[T.Brazil]    -0.6365   0.2512 -2.5340 0.0113 -1.1288 -0.1442\n",
      "country[T.Canada]    -0.5623   0.2772 -2.0285 0.0425 -1.1056 -0.0190\n",
      "country[T.Japan]      0.8305   0.4738  1.7530 0.0796 -0.0980  1.7590\n",
      "country[T.Lithuania] -0.1917   0.5429 -0.3531 0.7240 -1.2558  0.8724\n",
      "country[T.Poland]     0.7214   0.6908  1.0443 0.2964 -0.6326  2.0754\n",
      "country[T.Romania]   -0.9480   0.2913 -3.2545 0.0011 -1.5190 -0.3771\n",
      "country[T.Russia]    -0.3595   0.1957 -1.8364 0.0663 -0.7431  0.0242\n",
      "country[T.Serbia]    -0.5464   0.2831 -1.9302 0.0536 -1.1013  0.0084\n",
      "country[T.Thailand]  -0.3463   0.9311 -0.3719 0.7099 -2.1711  1.4786\n",
      "country[T.UK]        -0.0538   0.2434 -0.2211 0.8250 -0.5308  0.4232\n",
      "tobacco_ever[T.Yes]   0.6616   0.1453  4.5541 0.0000  0.3769  0.9463\n",
      "age_group             0.3406   0.0626  5.4412 0.0000  0.2179  0.4633\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.488003  1.147892  0.748448  1.842192e-01        1.000000\n",
      "sex[T.Male]           0.615366  1.089350  0.818748  1.698855e-01        1.000000\n",
      "country[T.Brazil]     0.323418  0.865727  0.529142  1.127735e-02        1.000000\n",
      "country[T.Canada]     0.331017  0.981188  0.569904  4.251165e-02        1.000000\n",
      "country[T.Japan]      0.906620  5.806870  2.294477  7.959572e-02        1.000000\n",
      "country[T.Lithuania]  0.284835  2.392621  0.825532  7.239855e-01        1.000000\n",
      "country[T.Poland]     0.531202  7.968059  2.057341  2.963683e-01        1.000000\n",
      "country[T.Romania]    0.218935  0.685851  0.387501  1.136001e-03        0.316944\n",
      "country[T.Russia]     0.475620  1.024477  0.698042  6.629485e-02        1.000000\n",
      "country[T.Serbia]     0.332439  1.008465  0.579010  5.358327e-02        1.000000\n",
      "country[T.Thailand]   0.114050  4.386586  0.707313  7.099492e-01        1.000000\n",
      "country[T.UK]         0.588115  1.526834  0.947604  8.249923e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.457702  2.576233  1.937881  5.261225e-06        0.001468\n",
      "age_group             1.243511  1.589371  1.405845  5.291094e-08        0.000015\n",
      "Running logistic regression with parameter sex, signature SBS5\n",
      "Zero counts for signature SBS5: 884\n",
      "All counts for signature SBS5: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             510.2984  \n",
      "Link Function:         Logit             BIC:             -6021.6734\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -241.15   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -268.19   \n",
      "No. Observations:      961               Deviance:        482.30    \n",
      "Df Model:              13                Pearson chi2:    941.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.6946   0.4520 -8.1732 0.0000 -4.5806 -2.8086\n",
      "sex[T.Male]          -0.1023   0.2673 -0.3829 0.7018 -0.6261  0.4215\n",
      "country[T.Brazil]     0.2327   0.3786  0.6146 0.5388 -0.5093  0.9747\n",
      "country[T.Canada]    -0.6398   0.5130 -1.2470 0.2124 -1.6453  0.3658\n",
      "country[T.Japan]      0.1984   0.5090  0.3897 0.6968 -0.7993  1.1960\n",
      "country[T.Lithuania]  0.5316   0.6869  0.7739 0.4390 -0.8148  1.8780\n",
      "country[T.Poland]    -0.2157   1.0868 -0.1985 0.8427 -2.3458  1.9143\n",
      "country[T.Romania]   -1.4158   0.7513 -1.8843 0.0595 -2.8884  0.0568\n",
      "country[T.Russia]    -1.1240   0.4690 -2.3966 0.0165 -2.0432 -0.2048\n",
      "country[T.Serbia]    -0.4719   0.5621 -0.8395 0.4012 -1.5736  0.6298\n",
      "country[T.Thailand]   0.4677   1.1586  0.4036 0.6865 -1.8032  2.7385\n",
      "country[T.UK]        -0.6812   0.4275 -1.5932 0.1111 -1.5191  0.1568\n",
      "tobacco_ever[T.Yes]   0.4243   0.2694  1.5750 0.1153 -0.1037  0.9523\n",
      "age_group             0.6168   0.1244  4.9594 0.0000  0.3730  0.8606\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.010249   0.060289  0.024858  3.003636e-16    8.380144e-14\n",
      "sex[T.Male]           0.534654   1.524229  0.902738  7.018188e-01    1.000000e+00\n",
      "country[T.Brazil]     0.600891   2.650256  1.261949  5.388485e-01    1.000000e+00\n",
      "country[T.Canada]     0.192948   1.441634  0.527409  2.123920e-01    1.000000e+00\n",
      "country[T.Japan]      0.449657   3.306817  1.219399  6.967587e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.442738   6.540428  1.701674  4.390034e-01    1.000000e+00\n",
      "country[T.Poland]     0.095771   6.782426  0.805954  8.426530e-01    1.000000e+00\n",
      "country[T.Romania]    0.055667   1.058494  0.242742  5.952396e-02    1.000000e+00\n",
      "country[T.Russia]     0.129612   0.814824  0.324979  1.654771e-02    1.000000e+00\n",
      "country[T.Serbia]     0.207306   1.877208  0.623824  4.011752e-01    1.000000e+00\n",
      "country[T.Thailand]   0.164774  15.463515  1.596243  6.864825e-01    1.000000e+00\n",
      "country[T.UK]         0.218902   1.169773  0.506030  1.111166e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.901474   2.591741  1.528524  1.152672e-01    1.000000e+00\n",
      "age_group             1.452148   2.364508  1.853002  7.070953e-07    1.972796e-04\n",
      "Running logistic regression with parameter sex, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 916\n",
      "All counts for signature SBS12: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.465071    2.524265    1.070095  7.940261e-01    1.000000e+00\n",
      "country[T.Brazil]      0.494468    9.464414    2.254001  2.615671e-01    1.000000e+00\n",
      "country[T.Canada]      1.311769   18.515799    4.800254  1.858320e-02    1.000000e+00\n",
      "country[T.Japan]      46.327813  528.814534  140.847779  1.993291e-24    5.561282e-22\n",
      "country[T.Lithuania]   0.011910   16.231686    1.607199  6.665923e-01    1.000000e+00\n",
      "country[T.Poland]      0.018979   26.673070    2.573986  5.215870e-01    1.000000e+00\n",
      "country[T.Romania]     0.370223   10.016911    2.165140  3.378470e-01    1.000000e+00\n",
      "country[T.Russia]      0.122448    3.327164    0.717194  6.233952e-01    1.000000e+00\n",
      "country[T.Serbia]      0.135009    7.588342    1.366899  6.706650e-01    1.000000e+00\n",
      "country[T.Thailand]    0.029118   49.255718    4.096510  4.073014e-01    1.000000e+00\n",
      "country[T.UK]          0.220563    5.914847    1.284845  6.840423e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.309222    1.655433    0.723576  4.165609e-01    1.000000e+00\n",
      "age_group              0.888536    1.747332    1.238523  2.009172e-01    1.000000e+00\n",
      "Intercept              0.002885    0.044899    0.012657  2.796214e-16    7.801438e-14\n",
      "Running logistic regression with parameter sex, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 807\n",
      "All counts for signature SBS13: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.127287  2.462790  1.655735  9.794606e-03    1.000000e+00\n",
      "country[T.Brazil]     0.418998  1.593755  0.839764  5.878339e-01    1.000000e+00\n",
      "country[T.Canada]     0.392739  1.650584  0.832803  6.096553e-01    1.000000e+00\n",
      "country[T.Japan]      0.115561  1.246694  0.442444  1.301198e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.343460  4.304409  1.381843  6.016801e-01    1.000000e+00\n",
      "country[T.Poland]     0.001402  1.417840  0.180827  1.218343e-01    1.000000e+00\n",
      "country[T.Romania]    0.039471  0.602227  0.195700  2.395126e-03    6.682403e-01\n",
      "country[T.Russia]     0.945758  2.429837  1.513600  8.343334e-02    1.000000e+00\n",
      "country[T.Serbia]     0.311937  1.552226  0.732883  4.265373e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004099  5.088425  0.545066  6.443175e-01    1.000000e+00\n",
      "country[T.UK]         0.675298  2.080448  1.196862  5.236542e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.836646  1.749752  1.208187  3.080497e-01    1.000000e+00\n",
      "age_group             0.912975  1.257242  1.070513  3.967870e-01    1.000000e+00\n",
      "Intercept             0.063442  0.205292  0.115804  5.122414e-15    1.429154e-12\n",
      "Running logistic regression with parameter sex, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 890\n",
      "All counts for signature SBS18: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.032579   3.080587  1.753911  3.653721e-02    1.000000e+00\n",
      "country[T.Brazil]     0.394458   2.113555  0.953939  7.828960e-01    1.000000e+00\n",
      "country[T.Canada]     0.206497   1.769045  0.673768  4.435272e-01    1.000000e+00\n",
      "country[T.Japan]      0.151010   2.572015  0.775389  6.623496e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.375509   7.239441  2.005130  3.513936e-01    1.000000e+00\n",
      "country[T.Poland]     0.119365   5.167568  1.139520  7.961894e-01    1.000000e+00\n",
      "country[T.Romania]    0.327200   2.382710  0.958571  7.943421e-01    1.000000e+00\n",
      "country[T.Russia]     0.294839   1.231669  0.613787  1.660166e-01    1.000000e+00\n",
      "country[T.Serbia]     0.282644   2.070118  0.830477  6.614248e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009301  11.929475  1.243455  7.727888e-01    1.000000e+00\n",
      "country[T.UK]         0.350881   1.850024  0.842125  6.291397e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.453154   1.236685  0.750254  2.494328e-01    1.000000e+00\n",
      "age_group             0.668749   1.027963  0.829600  8.462441e-02    1.000000e+00\n",
      "Intercept             0.052823   0.235230  0.114568  3.080887e-10    8.595675e-08\n",
      "Running logistic regression with parameter sex, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 956\n",
      "All counts for signature SBS21: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.171399      5.234059   0.884011  5.874751e-01        1.000000\n",
      "country[T.Brazil]     0.013726    474.702256   2.552817  3.773421e-01        1.000000\n",
      "country[T.Canada]     0.017881    621.014926   3.332013  5.517960e-01        1.000000\n",
      "country[T.Japan]      0.039746   1393.834621   7.442002  2.583965e-01        1.000000\n",
      "country[T.Lithuania]  0.090397   3227.197637  17.071964  1.629245e-01        1.000000\n",
      "country[T.Poland]     0.077872   2797.072690  14.762668  1.812838e-01        1.000000\n",
      "country[T.Romania]    1.720567   3002.094610  21.612695  1.135188e-02        1.000000\n",
      "country[T.Russia]     0.159592    456.700551   3.087434  3.252014e-01        1.000000\n",
      "country[T.Serbia]     0.017082    596.315789   3.191987  3.839307e-01        1.000000\n",
      "country[T.Thailand]   0.295495  12443.897451  60.439229  8.020075e-02        1.000000\n",
      "country[T.UK]         0.929098   1625.646851  11.703370  3.946253e-02        1.000000\n",
      "tobacco_ever[T.Yes]   0.256798      8.344817   1.351663  5.023740e-01        1.000000\n",
      "age_group             0.316170      1.486701   0.695730  2.719397e-01        1.000000\n",
      "Intercept             0.000025      0.054072   0.004006  1.798504e-08        0.000005\n",
      "Running logistic regression with parameter sex, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 890\n",
      "All counts for signature SBS22: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]             0.288149     1.254628    0.605967  1.665145e-01    1.000000e+00\n",
      "country[T.Brazil]       1.161096    76.379659    7.267801  3.169420e-02    1.000000e+00\n",
      "country[T.Canada]       0.844084    73.194481    6.543899  6.962839e-02    1.000000e+00\n",
      "country[T.Japan]        0.014619    42.311241    2.168091  5.172770e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.027413    82.647569    4.115478  3.737048e-01    1.000000e+00\n",
      "country[T.Poland]       0.061841   195.494053    9.407364  2.245085e-01    1.000000e+00\n",
      "country[T.Romania]    128.038297  5176.267656  547.352269  6.129645e-40    1.710171e-37\n",
      "country[T.Russia]       0.003760    10.545667    0.552633  6.127877e-01    1.000000e+00\n",
      "country[T.Serbia]      18.195928   737.528686   78.406690  8.139541e-13    2.270932e-10\n",
      "country[T.Thailand]    18.368925  2532.473535  169.445899  9.379369e-06    2.616844e-03\n",
      "country[T.UK]           0.174731    27.511843    2.192677  4.348079e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.456179     2.069096    0.966920  7.098424e-01    1.000000e+00\n",
      "age_group               1.418144     2.808220    1.963569  2.440326e-05    6.808509e-03\n",
      "Intercept               0.000132     0.007934    0.001449  1.971007e-27    5.499109e-25\n",
      "Running logistic regression with parameter sex, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 955\n",
      "All counts for signature SBS44: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.245220      6.202517   1.106275  5.894203e-01    1.000000e+00\n",
      "country[T.Brazil]     0.435796   1223.163842   8.319516  1.066432e-01    1.000000e+00\n",
      "country[T.Canada]     0.016995    587.167274   3.158867  5.702954e-01    1.000000e+00\n",
      "country[T.Japan]      0.036531   1277.975854   6.832235  2.733098e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.093085   3313.692073  17.552693  1.587576e-01    1.000000e+00\n",
      "country[T.Poland]     0.079940   2863.346515  15.132694  1.793597e-01    1.000000e+00\n",
      "country[T.Romania]    1.792936   3129.350824  22.524720  1.058021e-02    1.000000e+00\n",
      "country[T.Russia]     0.175472    500.611598   3.388023  3.080005e-01    1.000000e+00\n",
      "country[T.Serbia]     0.018077    630.899722   3.377359  3.863663e-01    1.000000e+00\n",
      "country[T.Thailand]   0.333037  13683.059176  67.110527  7.535508e-02    1.000000e+00\n",
      "country[T.UK]         0.906548   1583.909586  11.404388  4.196309e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.388738     10.214761   1.781457  3.406797e-01    1.000000e+00\n",
      "age_group             0.365681      1.498940   0.747648  3.118027e-01    1.000000e+00\n",
      "Intercept             0.000016      0.034502   0.002519  4.743965e-10    1.323566e-07\n",
      "Running logistic regression with parameter sex, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 481\n",
      "All counts for signature SBS1536A: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.345606  2.466747  1.818567  9.344643e-05    2.607156e-02\n",
      "country[T.Brazil]     0.179310  0.508878  0.303622  5.391794e-06    1.504310e-03\n",
      "country[T.Canada]     0.154877  0.489634  0.276796  9.713678e-06    2.710116e-03\n",
      "country[T.Japan]      0.026821  0.157798  0.067983  4.298529e-11    1.199290e-08\n",
      "country[T.Lithuania]  0.379517  4.021723  1.137340  8.201213e-01    1.000000e+00\n",
      "country[T.Poland]     0.304948  3.450321  0.990095  9.246937e-01    1.000000e+00\n",
      "country[T.Romania]    0.207614  0.694228  0.380429  1.672504e-03    4.666286e-01\n",
      "country[T.Russia]     0.207702  0.470869  0.313787  1.610551e-08    4.493438e-06\n",
      "country[T.Serbia]     0.277056  0.890186  0.497588  1.872183e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000200  0.255256  0.026803  5.411766e-04    1.509883e-01\n",
      "country[T.UK]         0.274757  0.726662  0.447403  1.160838e-03    3.238738e-01\n",
      "tobacco_ever[T.Yes]   0.813475  1.474020  1.094876  5.416542e-01    1.000000e+00\n",
      "age_group             1.762183  2.326847  2.019679  7.781277e-27    2.170976e-24\n",
      "Intercept             0.233780  0.584918  0.371289  1.724335e-05    4.810895e-03\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter sex, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 481\n",
      "All counts for signature SBS1536B: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1266.0601 \n",
      "Link Function:         Logit             BIC:             -5265.9117\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -619.03   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1238.1    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1719   0.2241 -5.2287 0.0000 -1.6112 -0.7326\n",
      "sex[T.Male]          -0.0747   0.1449 -0.5154 0.6063 -0.3586  0.2093\n",
      "country[T.Brazil]    -0.6812   0.2610 -2.6097 0.0091 -1.1928 -0.1696\n",
      "country[T.Canada]    -0.5143   0.2802 -1.8359 0.0664 -1.0634  0.0348\n",
      "country[T.Japan]     -0.4021   0.3740 -1.0751 0.2823 -1.1352  0.3310\n",
      "country[T.Lithuania] -0.1763   0.5296 -0.3330 0.7391 -1.2143  0.8616\n",
      "country[T.Poland]    -0.3266   0.6127 -0.5331 0.5940 -1.5275  0.8742\n",
      "country[T.Romania]    0.1992   0.2921  0.6821 0.4952 -0.3732  0.7717\n",
      "country[T.Russia]     0.4117   0.1968  2.0915 0.0365  0.0259  0.7974\n",
      "country[T.Serbia]     0.1821   0.2860  0.6366 0.5244 -0.3784  0.7426\n",
      "country[T.Thailand]  -0.7464   0.9380 -0.7957 0.4262 -2.5848  1.0921\n",
      "country[T.UK]         0.2838   0.2371  1.1970 0.2313 -0.1809  0.7484\n",
      "tobacco_ever[T.Yes]   0.3441   0.1447  2.3781 0.0174  0.0605  0.6277\n",
      "age_group             0.5173   0.0644  8.0272 0.0000  0.3910  0.6436\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.199651  0.480651  0.309778  1.707273e-07    4.763291e-05\n",
      "sex[T.Male]           0.698665  1.232758  0.928055  6.062520e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303357  0.843996  0.505996  9.061854e-03    1.000000e+00\n",
      "country[T.Canada]     0.345266  1.035380  0.597898  6.637794e-02    1.000000e+00\n",
      "country[T.Japan]      0.321356  1.392304  0.668899  2.823246e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.296918  2.366952  0.838327  7.391365e-01    1.000000e+00\n",
      "country[T.Poland]     0.217077  2.397014  0.721344  5.939527e-01    1.000000e+00\n",
      "country[T.Romania]    0.688498  2.163458  1.220466  4.951727e-01    1.000000e+00\n",
      "country[T.Russia]     1.026218  2.219810  1.509308  3.648736e-02    1.000000e+00\n",
      "country[T.Serbia]     0.684922  2.101322  1.199684  5.243755e-01    1.000000e+00\n",
      "country[T.Thailand]   0.075414  2.980403  0.474092  4.262062e-01    1.000000e+00\n",
      "country[T.UK]         0.834529  2.113692  1.328133  2.313177e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.062375  1.873292  1.410723  1.740030e-02    1.000000e+00\n",
      "age_group             1.478438  1.903310  1.677476  9.972505e-16    2.782329e-13\n",
      "Running logistic regression with parameter sex, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 845\n",
      "All counts for signature SBS1536F: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.650125   1.488847  0.980584  9.000461e-01    1.000000e+00\n",
      "country[T.Brazil]     0.730990   3.070220  1.525799  2.512066e-01    1.000000e+00\n",
      "country[T.Canada]     1.451735   5.859918  2.941631  3.142781e-03    8.768358e-01\n",
      "country[T.Japan]      0.601385   4.635639  1.801791  2.695751e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.598493   7.838257  2.448556  1.903225e-01    1.000000e+00\n",
      "country[T.Poland]     0.002879   2.985384  0.372925  4.259788e-01    1.000000e+00\n",
      "country[T.Romania]    0.074851   1.200943  0.376888  1.053765e-01    1.000000e+00\n",
      "country[T.Russia]     0.910607   2.859994  1.604777  1.013274e-01    1.000000e+00\n",
      "country[T.Serbia]     0.280697   2.018363  0.818527  6.686595e-01    1.000000e+00\n",
      "country[T.Thailand]   0.283356  16.955946  2.935053  3.081039e-01    1.000000e+00\n",
      "country[T.UK]         0.770335   3.030079  1.547576  2.130729e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.483592   1.114048  0.735575  1.463890e-01    1.000000e+00\n",
      "age_group             0.820592   1.166748  0.977949  7.909046e-01    1.000000e+00\n",
      "Intercept             0.066269   0.240694  0.128793  9.824822e-12    2.741125e-09\n",
      "Running logistic regression with parameter sex, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 866\n",
      "All counts for signature SBS1536I: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.455980    1.761877    0.896424  5.191496e-01    1.000000e+00\n",
      "country[T.Brazil]      0.449911    7.811765    2.002186  2.635417e-01    1.000000e+00\n",
      "country[T.Canada]      0.002327    2.808592    0.307878  3.476195e-01    1.000000e+00\n",
      "country[T.Japan]       0.148192    8.165945    1.507134  4.742042e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.385815   23.051682    4.034247  1.617173e-01    1.000000e+00\n",
      "country[T.Poland]      0.017815   25.360794    2.431331  4.164332e-01    1.000000e+00\n",
      "country[T.Romania]    72.011661  655.727301  198.557016  3.109042e-39    8.674227e-37\n",
      "country[T.Russia]      0.050174    2.577508    0.497705  3.257964e-01    1.000000e+00\n",
      "country[T.Serbia]     29.923433  257.454199   80.465260  7.886361e-25    2.200295e-22\n",
      "country[T.Thailand]    7.987582  412.561786   53.436827  9.116935e-05    2.543625e-02\n",
      "country[T.UK]          0.001373    1.647099    0.181438  1.216908e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.578338    2.291137    1.144543  4.767930e-01    1.000000e+00\n",
      "age_group              1.568936    2.968918    2.128238  2.988216e-07    8.337123e-05\n",
      "Intercept              0.000745    0.011855    0.003273  1.557044e-29    4.344153e-27\n",
      "Running logistic regression with parameter sex, signature DBS2\n",
      "Zero counts for signature DBS2: 560\n",
      "All counts for signature DBS2: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1267.8276 \n",
      "Link Function:         Logit             BIC:             -5264.1441\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -619.91   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -652.90   \n",
      "No. Observations:      961               Deviance:        1239.8    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2612   0.2242 -5.6261 0.0000 -1.7006 -0.8219\n",
      "sex[T.Male]           0.3185   0.1454  2.1901 0.0285  0.0335  0.6035\n",
      "country[T.Brazil]    -0.2595   0.2532 -1.0249 0.3054 -0.7559  0.2368\n",
      "country[T.Canada]    -0.4443   0.2776 -1.6003 0.1095 -0.9885  0.0999\n",
      "country[T.Japan]     -0.9747   0.3951 -2.4670 0.0136 -1.7492 -0.2003\n",
      "country[T.Lithuania] -0.0472   0.5301 -0.0891 0.9290 -1.0863  0.9918\n",
      "country[T.Poland]    -0.3297   0.6008 -0.5488 0.5831 -1.5073  0.8478\n",
      "country[T.Romania]    0.4115   0.2888  1.4249 0.1542 -0.1545  0.9776\n",
      "country[T.Russia]    -0.2759   0.1987 -1.3882 0.1651 -0.6653  0.1136\n",
      "country[T.Serbia]     0.3193   0.2810  1.1362 0.2559 -0.2315  0.8701\n",
      "country[T.Thailand]  -0.0545   0.9395 -0.0580 0.9538 -1.8959  1.7870\n",
      "country[T.UK]        -0.2359   0.2332 -1.0114 0.3118 -0.6931  0.2212\n",
      "tobacco_ever[T.Yes]   0.7451   0.1443  5.1644 0.0000  0.4623  1.0279\n",
      "age_group             0.2456   0.0622  3.9511 0.0001  0.1238  0.3674\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.182571  0.439613  0.283303  1.843297e-08        0.000005\n",
      "sex[T.Male]           1.034029  1.828499  1.375035  2.851870e-02        1.000000\n",
      "country[T.Brazil]     0.469585  1.267200  0.771400  3.054220e-01        1.000000\n",
      "country[T.Canada]     0.372131  1.105013  0.641256  1.095291e-01        1.000000\n",
      "country[T.Japan]      0.173921  0.818467  0.377291  1.362659e-02        1.000000\n",
      "country[T.Lithuania]  0.337469  2.696150  0.953870  9.290140e-01        1.000000\n",
      "country[T.Poland]     0.221502  2.334587  0.719108  5.831244e-01        1.000000\n",
      "country[T.Romania]    0.856818  2.658029  1.509121  1.541841e-01        1.000000\n",
      "country[T.Russia]     0.514095  1.120337  0.758920  1.650877e-01        1.000000\n",
      "country[T.Serbia]     0.793350  2.387116  1.376161  2.558686e-01        1.000000\n",
      "country[T.Thailand]   0.150188  5.971288  0.947004  9.537823e-01        1.000000\n",
      "country[T.UK]         0.500047  1.247629  0.789856  3.118220e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.587752  2.795076  2.106630  2.411644e-07        0.000067\n",
      "age_group             1.131734  1.443946  1.278344  7.779611e-05        0.021705\n",
      "Running logistic regression with parameter sex, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 872\n",
      "All counts for signature DBS4: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.649549  1.667829  1.035505  8.324531e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303536  1.526864  0.715680  3.916557e-01    1.000000e+00\n",
      "country[T.Canada]     0.379066  1.956257  0.903695  7.992965e-01    1.000000e+00\n",
      "country[T.Japan]      0.090128  1.489515  0.458738  2.107495e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.993862  9.126831  3.176710  5.029639e-02    1.000000e+00\n",
      "country[T.Poland]     0.104032  4.368183  0.983992  8.503749e-01    1.000000e+00\n",
      "country[T.Romania]    0.156817  1.286745  0.504041  1.593594e-01    1.000000e+00\n",
      "country[T.Russia]     0.427891  1.460657  0.798890  4.589157e-01    1.000000e+00\n",
      "country[T.Serbia]     0.058474  0.915967  0.292268  3.244994e-02    1.000000e+00\n",
      "country[T.Thailand]   0.004005  4.998769  0.533547  6.295572e-01    1.000000e+00\n",
      "country[T.UK]         0.228764  1.142442  0.537991  1.081504e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.605964  1.551418  0.969476  8.146180e-01    1.000000e+00\n",
      "age_group             1.068929  1.612481  1.308855  8.824145e-03    1.000000e+00\n",
      "Intercept             0.037209  0.158745  0.078770  1.267446e-14    3.536175e-12\n",
      "Running logistic regression with parameter sex, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 926\n",
      "All counts for signature DBS9: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.749738   3.371281  1.547963  2.341993e-01    1.000000e+00\n",
      "country[T.Brazil]     0.326835   3.238345  1.119427  7.355736e-01    1.000000e+00\n",
      "country[T.Canada]     0.396630   4.001047  1.369066  5.874001e-01    1.000000e+00\n",
      "country[T.Japan]      0.002089   2.189339  0.271052  2.637499e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.228317  10.339700  2.204904  3.976216e-01    1.000000e+00\n",
      "country[T.Poland]     0.006551   7.346715  0.858920  7.718383e-01    1.000000e+00\n",
      "country[T.Romania]    0.001286   1.304205  0.165939  9.694659e-02    1.000000e+00\n",
      "country[T.Russia]     0.486975   2.901014  1.193206  6.497223e-01    1.000000e+00\n",
      "country[T.Serbia]     0.308079   3.928812  1.244799  6.756603e-01    1.000000e+00\n",
      "country[T.Thailand]   0.015996  21.869489  2.163723  5.914256e-01    1.000000e+00\n",
      "country[T.UK]         0.090800   1.646073  0.473951  2.451247e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.462226   1.904089  0.938001  7.321785e-01    1.000000e+00\n",
      "age_group             0.813545   1.504158  1.102133  5.018870e-01    1.000000e+00\n",
      "Intercept             0.009131   0.085815  0.029856  6.065105e-14    1.692164e-11\n",
      "Running logistic regression with parameter sex, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 868\n",
      "All counts for signature DBS78C: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.944471  2.463965  1.509243  8.530063e-02    1.000000e+00\n",
      "country[T.Brazil]     0.469791  2.311767  1.082165  8.080540e-01    1.000000e+00\n",
      "country[T.Canada]     0.917660  3.943163  1.936455  8.141775e-02    1.000000e+00\n",
      "country[T.Japan]      0.111133  1.876989  0.569543  3.820167e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.100050  4.063697  0.938064  8.708911e-01    1.000000e+00\n",
      "country[T.Poland]     0.002811  2.924819  0.364332  4.121034e-01    1.000000e+00\n",
      "country[T.Romania]    0.290054  2.082964  0.845629  7.148150e-01    1.000000e+00\n",
      "country[T.Russia]     0.481277  1.786985  0.936193  8.202276e-01    1.000000e+00\n",
      "country[T.Serbia]     0.396915  2.528706  1.070058  8.515292e-01    1.000000e+00\n",
      "country[T.Thailand]   0.007001  8.821741  0.933259  8.831526e-01    1.000000e+00\n",
      "country[T.UK]         0.679407  2.659092  1.363132  3.711935e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.741667  1.838999  1.164592  4.967608e-01    1.000000e+00\n",
      "age_group             0.975353  1.446585  1.185350  8.694366e-02    1.000000e+00\n",
      "Intercept             0.024806  0.108780  0.053308  4.389740e-19    1.224737e-16\n",
      "Running logistic regression with parameter sex, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 903\n",
      "All counts for signature DBS78D: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.420844    1.508620   0.796372  4.455390e-01    1.000000e+00\n",
      "country[T.Brazil]      0.080349    4.073035   0.793587  6.771181e-01    1.000000e+00\n",
      "country[T.Canada]      0.302592    7.428085   1.726409  4.916779e-01    1.000000e+00\n",
      "country[T.Japan]       0.184085    9.882678   1.854295  4.801972e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.374169   21.591498   3.863117  1.970795e-01    1.000000e+00\n",
      "country[T.Poland]      0.739540   45.213474   7.766369  7.263108e-02    1.000000e+00\n",
      "country[T.Romania]    17.067865  127.592144  42.809054  9.416859e-20    2.627304e-17\n",
      "country[T.Russia]      0.311856    4.343047   1.193444  6.700521e-01    1.000000e+00\n",
      "country[T.Serbia]      3.926758   35.243258  11.100082  5.281057e-06    1.473415e-03\n",
      "country[T.Thailand]    0.019338   31.128863   2.698326  5.051161e-01    1.000000e+00\n",
      "country[T.UK]          0.181128    4.372493   1.026747  7.443558e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.413415    1.558633   0.804893  4.666517e-01    1.000000e+00\n",
      "age_group              1.172996    2.079095   1.548845  1.731261e-03    4.830217e-01\n",
      "Intercept              0.002633    0.030726   0.009823  8.827208e-23    2.462791e-20\n",
      "Running logistic regression with parameter sex, signature ID1\n",
      "Zero counts for signature ID1: 481\n",
      "All counts for signature ID1: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1311.7455 \n",
      "Link Function:         Logit             BIC:             -5220.2262\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -641.87   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1283.7    \n",
      "Df Model:              13                Pearson chi2:    963.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.4605   0.2122 -2.1703 0.0300 -0.8763 -0.0446\n",
      "sex[T.Male]           0.1691   0.1417  1.1936 0.2326 -0.1086  0.4468\n",
      "country[T.Brazil]     0.1099   0.2432  0.4518 0.6514 -0.3667  0.5864\n",
      "country[T.Canada]     0.4379   0.2753  1.5906 0.1117 -0.1017  0.9776\n",
      "country[T.Japan]      0.0259   0.3642  0.0711 0.9433 -0.6879  0.7397\n",
      "country[T.Lithuania] -0.9043   0.5605 -1.6133 0.1067 -2.0028  0.1943\n",
      "country[T.Poland]    -1.5560   0.7837 -1.9855 0.0471 -3.0921 -0.0200\n",
      "country[T.Romania]   -0.4125   0.2872 -1.4365 0.1509 -0.9753  0.1503\n",
      "country[T.Russia]     0.2438   0.1915  1.2731 0.2030 -0.1316  0.6192\n",
      "country[T.Serbia]    -0.3541   0.2833 -1.2497 0.2114 -0.9094  0.2012\n",
      "country[T.Thailand]   1.1562   1.1321  1.0213 0.3071 -1.0627  3.3750\n",
      "country[T.UK]        -0.1919   0.2285 -0.8397 0.4011 -0.6398  0.2560\n",
      "tobacco_ever[T.Yes]  -0.3293   0.1405 -2.3440 0.0191 -0.6046 -0.0539\n",
      "age_group             0.2641   0.0603  4.3832 0.0000  0.1460  0.3823\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.416317   0.956354  0.630989  0.029984        1.000000\n",
      "sex[T.Male]           0.897117   1.563251  1.184238  0.232629        1.000000\n",
      "country[T.Brazil]     0.692995   1.797570  1.116113  0.651436        1.000000\n",
      "country[T.Canada]     0.903317   2.657947  1.549506  0.111690        1.000000\n",
      "country[T.Japan]      0.502629   2.095352  1.026248  0.943286        1.000000\n",
      "country[T.Lithuania]  0.134958   1.214431  0.404842  0.106670        1.000000\n",
      "country[T.Poland]     0.045406   0.980210  0.210969  0.047092        1.000000\n",
      "country[T.Romania]    0.377069   1.162201  0.661990  0.150858        1.000000\n",
      "country[T.Russia]     0.876731   1.857454  1.276122  0.202990        1.000000\n",
      "country[T.Serbia]     0.402782   1.222912  0.701831  0.211413        1.000000\n",
      "country[T.Thailand]   0.345534  29.223537  3.177690  0.307125        1.000000\n",
      "country[T.UK]         0.527411   1.291742  0.825396  0.401060        1.000000\n",
      "tobacco_ever[T.Yes]   0.546296   0.947484  0.719448  0.019079        1.000000\n",
      "age_group             1.157230   1.465580  1.302310  0.000012        0.003263\n",
      "Running logistic regression with parameter sex, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 945\n",
      "All counts for signature ID2: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.374218   3.023756  1.029775  5.832641e-01        1.000000\n",
      "country[T.Brazil]     0.001731   2.056395  0.228403  1.739022e-01        1.000000\n",
      "country[T.Canada]     0.002556   3.068373  0.337809  3.919891e-01        1.000000\n",
      "country[T.Japan]      0.637901  16.462365  3.711590  1.057310e-01        1.000000\n",
      "country[T.Lithuania]  0.010753  13.726040  1.437142  5.163704e-01        1.000000\n",
      "country[T.Poland]     0.012237  16.077832  1.643945  5.033930e-01        1.000000\n",
      "country[T.Romania]    0.324377   7.820795  1.837371  3.283197e-01        1.000000\n",
      "country[T.Russia]     0.225203   3.170622  0.866209  5.396545e-01        1.000000\n",
      "country[T.Serbia]     0.002397   2.872017  0.316777  2.805266e-01        1.000000\n",
      "country[T.Thailand]   0.029905  49.227238  4.189119  3.224307e-01        1.000000\n",
      "country[T.UK]         0.347715   5.851776  1.525801  3.875624e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.252441   2.027974  0.724593  3.989168e-01        1.000000\n",
      "age_group             0.557156   1.322096  0.856259  3.622407e-01        1.000000\n",
      "Intercept             0.007881   0.130438  0.036090  5.625232e-09        0.000002\n",
      "Running logistic regression with parameter sex, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 920\n",
      "All counts for signature ID3: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.457623   1.771572  0.890208  6.595917e-01    1.000000e+00\n",
      "country[T.Brazil]     0.246893   3.268034  1.007879  7.330826e-01    1.000000e+00\n",
      "country[T.Canada]     0.156369   3.047160  0.834773  7.872298e-01    1.000000e+00\n",
      "country[T.Japan]      0.309735   6.343414  1.684785  4.578241e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.005909   6.650686  0.775214  7.154907e-01    1.000000e+00\n",
      "country[T.Poland]     0.302147  14.969733  2.995547  2.671726e-01    1.000000e+00\n",
      "country[T.Romania]    2.329268  14.712246  5.784115  1.978430e-04    5.519820e-02\n",
      "country[T.Russia]     0.309987   2.541188  0.913985  7.302768e-01    1.000000e+00\n",
      "country[T.Serbia]     0.065364   2.778826  0.619049  5.185249e-01    1.000000e+00\n",
      "country[T.Thailand]   0.818659  56.997385  8.897890  6.399355e-02    1.000000e+00\n",
      "country[T.UK]         0.379409   3.447739  1.200193  6.300751e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.837451   3.309871  1.643232  1.397033e-01    1.000000e+00\n",
      "age_group             0.843451   1.505120  1.122191  4.035171e-01    1.000000e+00\n",
      "Intercept             0.008141   0.069129  0.025179  9.901916e-17    2.762635e-14\n",
      "Running logistic regression with parameter sex, signature ID5\n",
      "Zero counts for signature ID5: 481\n",
      "All counts for signature ID5: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1146.5794 \n",
      "Link Function:         Logit             BIC:             -5385.3923\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -559.29   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1118.6    \n",
      "Df Model:              13                Pearson chi2:    964.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.4654   0.2410 -6.0807 0.0000 -1.9378 -0.9931\n",
      "sex[T.Male]           0.5113   0.1562  3.2737 0.0011  0.2052  0.8174\n",
      "country[T.Brazil]    -0.9738   0.2698 -3.6096 0.0003 -1.5026 -0.4451\n",
      "country[T.Canada]    -0.8139   0.2948 -2.7609 0.0058 -1.3917 -0.2361\n",
      "country[T.Japan]     -2.7842   0.5000 -5.5679 0.0000 -3.7643 -1.8041\n",
      "country[T.Lithuania] -0.0867   0.5632 -0.1539 0.8777 -1.1905  1.0172\n",
      "country[T.Poland]    -0.9973   0.6647 -1.5003 0.1335 -2.3002  0.3056\n",
      "country[T.Romania]    0.3524   0.3281  1.0739 0.2828 -0.2907  0.9954\n",
      "country[T.Russia]    -0.4554   0.2063 -2.2071 0.0273 -0.8598 -0.0510\n",
      "country[T.Serbia]    -0.0649   0.3026 -0.2144 0.8303 -0.6580  0.5282\n",
      "country[T.Thailand]  -2.3628   1.1588 -2.0391 0.0414 -4.6339 -0.0917\n",
      "country[T.UK]        -0.3619   0.2480 -1.4594 0.1445 -0.8479  0.1241\n",
      "tobacco_ever[T.Yes]  -0.0790   0.1531 -0.5160 0.6058 -0.3790  0.2211\n",
      "age_group             0.8113   0.0734 11.0519 0.0000  0.6674  0.9552\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.144023  0.370432  0.230977  1.196802e-09    3.339077e-07\n",
      "sex[T.Male]           1.227731  2.264541  1.667407  1.061676e-03    2.962077e-01\n",
      "country[T.Brazil]     0.222549  0.640784  0.377632  3.066150e-04    8.554558e-02\n",
      "country[T.Canada]     0.248641  0.789682  0.443111  5.763978e-03    1.000000e+00\n",
      "country[T.Japan]      0.023184  0.164618  0.061778  2.578485e-08    7.193972e-06\n",
      "country[T.Lithuania]  0.304065  2.765335  0.916974  8.776876e-01    1.000000e+00\n",
      "country[T.Poland]     0.100244  1.357378  0.368875  1.335371e-01    1.000000e+00\n",
      "country[T.Romania]    0.747740  2.705839  1.422415  2.828483e-01    1.000000e+00\n",
      "country[T.Russia]     0.423255  0.950296  0.634207  2.731048e-02    1.000000e+00\n",
      "country[T.Serbia]     0.517907  1.695914  0.937190  8.302578e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009717  0.912377  0.094155  4.143976e-02    1.000000e+00\n",
      "country[T.UK]         0.428304  1.132166  0.696356  1.444598e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.684513  1.247392  0.924044  6.058474e-01    1.000000e+00\n",
      "age_group             1.949190  2.599096  2.250807  2.145371e-28    5.985584e-26\n",
      "Running logistic regression with parameter sex, signature ID8\n",
      "Zero counts for signature ID8: 260\n",
      "All counts for signature ID8: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1055.1280 \n",
      "Link Function:         Logit             BIC:             -5476.8438\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -513.56   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -561.04   \n",
      "No. Observations:      961               Deviance:        1027.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.0689   0.2399  0.2872 0.7739 -0.4013  0.5391\n",
      "sex[T.Male]           0.4162   0.1614  2.5777 0.0099  0.0997  0.7326\n",
      "country[T.Brazil]    -0.7763   0.2738 -2.8354 0.0046 -1.3130 -0.2397\n",
      "country[T.Canada]    -0.2465   0.3312 -0.7443 0.4567 -0.8956  0.4026\n",
      "country[T.Japan]     -1.5096   0.3958 -3.8145 0.0001 -2.2853 -0.7340\n",
      "country[T.Lithuania]  0.4631   0.7839  0.5908 0.5547 -1.0733  1.9996\n",
      "country[T.Poland]    -0.2932   0.6458 -0.4540 0.6498 -1.5589  0.9725\n",
      "country[T.Romania]   -0.6716   0.3264 -2.0574 0.0396 -1.3113 -0.0318\n",
      "country[T.Russia]    -0.1172   0.2280 -0.5140 0.6072 -0.5642  0.3297\n",
      "country[T.Serbia]    -0.7006   0.3075 -2.2788 0.0227 -1.3033 -0.0980\n",
      "country[T.Thailand]  -1.2885   0.9449 -1.3636 0.1727 -3.1406  0.5635\n",
      "country[T.UK]        -0.2436   0.2818 -0.8644 0.3874 -0.7960  0.3088\n",
      "tobacco_ever[T.Yes]  -0.0028   0.1613 -0.0176 0.9860 -0.3189  0.3133\n",
      "age_group             0.5556   0.0716  7.7645 0.0000  0.4153  0.6958\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.669452  1.714475  1.071335  7.739414e-01    1.000000e+00\n",
      "sex[T.Male]           1.104877  2.080432  1.516121  9.945063e-03    1.000000e+00\n",
      "country[T.Brazil]     0.269022  0.786869  0.460093  4.576735e-03    1.000000e+00\n",
      "country[T.Canada]     0.408360  1.495734  0.781536  4.567074e-01    1.000000e+00\n",
      "country[T.Japan]      0.101744  0.480008  0.220994  1.364481e-04    3.806901e-02\n",
      "country[T.Lithuania]  0.341862  7.386022  1.589025  5.546745e-01    1.000000e+00\n",
      "country[T.Poland]     0.210375  2.644453  0.745873  6.498000e-01    1.000000e+00\n",
      "country[T.Romania]    0.269459  0.968702  0.510906  3.964978e-02    1.000000e+00\n",
      "country[T.Russia]     0.568825  1.390615  0.889391  6.072450e-01    1.000000e+00\n",
      "country[T.Serbia]     0.271646  0.906629  0.496268  2.268052e-02    1.000000e+00\n",
      "country[T.Thailand]   0.043258  1.756766  0.275672  1.726786e-01    1.000000e+00\n",
      "country[T.UK]         0.451151  1.361729  0.783802  3.873779e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.726928  1.367866  0.997166  9.859598e-01    1.000000e+00\n",
      "age_group             1.514874  2.005347  1.742943  8.199492e-15    2.287658e-12\n",
      "Running logistic regression with parameter sex, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 957\n",
      "All counts for signature ID9: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.185678   12.336978   1.196410  6.612038e-01        1.000000\n",
      "country[T.Brazil]     0.004189    7.302258   0.581571  4.784588e-01        1.000000\n",
      "country[T.Canada]     0.160117   14.016129   1.796904  5.479595e-01        1.000000\n",
      "country[T.Japan]      0.007389   14.299791   1.055337  6.345034e-01        1.000000\n",
      "country[T.Lithuania]  0.022404   42.729347   3.174849  4.062195e-01        1.000000\n",
      "country[T.Poland]     0.025651   50.730402   3.667075  3.548069e-01        1.000000\n",
      "country[T.Romania]    0.005970   10.370154   0.828074  5.406973e-01        1.000000\n",
      "country[T.Russia]     0.002148    4.112552   0.304443  3.377300e-01        1.000000\n",
      "country[T.Serbia]     0.233843   21.847897   2.686228  3.240590e-01        1.000000\n",
      "country[T.Thailand]   0.066857  173.327968  10.085983  2.187983e-01        1.000000\n",
      "country[T.UK]         0.002788    4.949229   0.389226  3.688637e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.308024   23.088732   2.059376  3.443659e-01        1.000000\n",
      "age_group             0.552409    3.278848   1.264666  4.922317e-01        1.000000\n",
      "Intercept             0.000089    0.053370   0.003927  3.322967e-08        0.000009\n",
      "Running logistic regression with parameter sex, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 953\n",
      "All counts for signature ID11: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.137026   2.422243  0.613014  3.991068e-01         1.00000\n",
      "country[T.Brazil]     0.002927   4.255458  0.397376  4.272206e-01         1.00000\n",
      "country[T.Canada]     0.203576  14.197568  2.181766  4.204162e-01         1.00000\n",
      "country[T.Japan]      0.009958  15.222180  1.365803  5.958346e-01         1.00000\n",
      "country[T.Lithuania]  0.012295  19.378197  1.699896  5.265037e-01         1.00000\n",
      "country[T.Poland]     0.033881  65.409000  4.839885  3.259077e-01         1.00000\n",
      "country[T.Romania]    0.003838   5.501099  0.519624  5.142460e-01         1.00000\n",
      "country[T.Russia]     0.218359   5.569579  1.097867  6.164876e-01         1.00000\n",
      "country[T.Serbia]     0.003971   5.730619  0.538336  4.962380e-01         1.00000\n",
      "country[T.Thailand]   0.021493  43.116130  3.111328  4.082013e-01         1.00000\n",
      "country[T.UK]         0.116350   7.738644  1.226265  6.247734e-01         1.00000\n",
      "tobacco_ever[T.Yes]   0.023986   1.213531  0.235698  7.455495e-02         1.00000\n",
      "age_group             0.679254   2.379662  1.234258  4.093597e-01         1.00000\n",
      "Intercept             0.001768   0.114194  0.018709  2.138700e-07         0.00006\n",
      "Running logistic regression with parameter sex, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 954\n",
      "All counts for signature ID12: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.128605      2.395574   0.576850  0.349191        1.000000\n",
      "country[T.Brazil]     1.333380   1956.675846  14.393065  0.019249        1.000000\n",
      "country[T.Canada]     0.022074    780.105632   4.148480  0.482822        1.000000\n",
      "country[T.Japan]      0.051370   1823.303871   9.672546  0.196098        1.000000\n",
      "country[T.Lithuania]  0.106766   3905.391477  20.388273  0.133156        1.000000\n",
      "country[T.Poland]     0.065907   2454.444739  12.738588  0.178832        1.000000\n",
      "country[T.Romania]    0.020815    725.687006   3.887272  0.245807        1.000000\n",
      "country[T.Russia]     0.529068    756.928969   5.593369  0.131457        1.000000\n",
      "country[T.Serbia]     0.014123    492.815566   2.638875  0.352869        1.000000\n",
      "country[T.Thailand]   0.322065  14731.612912  68.472752  0.066981        1.000000\n",
      "country[T.UK]         0.426776   1197.409658   8.142941  0.099089        1.000000\n",
      "tobacco_ever[T.Yes]   0.117015      2.788629   0.653089  0.450095        1.000000\n",
      "age_group             0.205483      0.910157   0.461979  0.020336        1.000000\n",
      "Intercept             0.000091      0.124469   0.012695  0.000001        0.000389\n",
      "Running logistic regression with parameter sex, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 944\n",
      "All counts for signature ID83C: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.386814      3.584608    1.145584  6.504633e-01    1.000000e+00\n",
      "country[T.Brazil]      0.016332    560.359192    3.025082  4.763738e-01    1.000000e+00\n",
      "country[T.Canada]      0.579863   1630.616888   11.084654  1.043989e-01    1.000000e+00\n",
      "country[T.Japan]       0.033745   1185.169352    6.324505  3.132266e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.076864   2723.726834   14.469838  1.914579e-01    1.000000e+00\n",
      "country[T.Poland]      0.137312   4949.091905   26.048856  1.314863e-01    1.000000e+00\n",
      "country[T.Romania]    16.640395  16612.852435  129.032687  1.952549e-10    5.447613e-08\n",
      "country[T.Russia]      0.007819    271.118071    1.455904  6.151711e-01    1.000000e+00\n",
      "country[T.Serbia]      3.036852   4362.863473   32.198499  2.387989e-03    6.662489e-01\n",
      "country[T.Thailand]    0.176828   6974.318483   35.105375  1.165617e-01    1.000000e+00\n",
      "country[T.UK]          0.012284    422.343117    2.277772  5.064456e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.238367      2.285891    0.754108  4.981554e-01    1.000000e+00\n",
      "age_group              0.923130      2.470521    1.472286  9.588224e-02    1.000000e+00\n",
      "Intercept              0.000006      0.009959    0.000872  4.248561e-18    1.185349e-15\n",
      "Running logistic regression with parameter sex, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 481\n",
      "All counts for signature SBS_burden: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.424533   2.708518  1.959239  3.148233e-05    8.783569e-03\n",
      "country[T.Brazil]     0.321739   0.952125  0.555417  3.242355e-02    1.000000e+00\n",
      "country[T.Canada]     0.176452   0.596007  0.326817  2.438769e-04    6.804166e-02\n",
      "country[T.Japan]      0.187811   0.972171  0.426904  4.265378e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.285147   2.541897  0.838477  7.416656e-01    1.000000e+00\n",
      "country[T.Poland]     0.193742   2.584661  0.732695  6.190425e-01    1.000000e+00\n",
      "country[T.Romania]    3.496526  19.547025  7.842931  7.955217e-08    2.219506e-05\n",
      "country[T.Russia]     0.487470   1.127575  0.741804  1.619068e-01    1.000000e+00\n",
      "country[T.Serbia]     0.820998   2.823028  1.515669  1.839464e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000263   0.363622  0.035843  2.445103e-03    6.821836e-01\n",
      "country[T.UK]         0.459913   1.252598  0.759114  2.789023e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.842714   1.575609  1.151715  3.679264e-01    1.000000e+00\n",
      "age_group             2.394166   3.292596  2.796220  7.270876e-49    2.028574e-46\n",
      "Intercept             0.056066   0.158684  0.095329  9.447907e-22    2.635966e-19\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter sex, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 530\n",
      "All counts for signature DBS_burden: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.161888  2.185593  1.590682  3.704111e-03    1.000000e+00\n",
      "country[T.Brazil]     0.379947  1.069320  0.639249  8.832308e-02    1.000000e+00\n",
      "country[T.Canada]     0.230532  0.736796  0.414998  2.588723e-03    7.222538e-01\n",
      "country[T.Japan]      0.091265  0.468360  0.211999  1.001378e-04    2.793843e-02\n",
      "country[T.Lithuania]  0.539177  5.032744  1.558902  4.183740e-01    1.000000e+00\n",
      "country[T.Poland]     0.198378  2.262121  0.694182  5.410311e-01    1.000000e+00\n",
      "country[T.Romania]    1.810707  8.689513  3.819218  2.976646e-04    8.304843e-02\n",
      "country[T.Russia]     0.502772  1.159735  0.764028  2.060447e-01    1.000000e+00\n",
      "country[T.Serbia]     0.747300  2.418484  1.339524  3.265771e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000354  0.454511  0.047439  4.866875e-03    1.000000e+00\n",
      "country[T.UK]         0.622362  1.825153  1.061828  8.208100e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.801407  1.490542  1.092489  5.656693e-01    1.000000e+00\n",
      "age_group             1.784440  2.384883  2.057017  3.323695e-26    9.273110e-24\n",
      "Intercept             0.123892  0.325875  0.202480  1.402747e-11    3.913663e-09\n",
      "Running logistic regression with parameter sex, signature ID_burden\n",
      "Zero counts for signature ID_burden: 482\n",
      "All counts for signature ID_burden: 961\n",
      "Covariates used: ['age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1112.6005 \n",
      "Link Function:         Logit             BIC:             -5419.3712\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -542.30   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1084.6    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.7446   0.2496 -6.9894 0.0000 -2.2338 -1.2554\n",
      "sex[T.Male]           0.6143   0.1602  3.8349 0.0001  0.3003  0.9282\n",
      "country[T.Brazil]    -0.8240   0.2725 -3.0236 0.0025 -1.3582 -0.2899\n",
      "country[T.Canada]    -1.0580   0.3041 -3.4795 0.0005 -1.6539 -0.4620\n",
      "country[T.Japan]     -2.6428   0.4840 -5.4600 0.0000 -3.5915 -1.6941\n",
      "country[T.Lithuania] -0.6290   0.5554 -1.1324 0.2575 -1.7176  0.4596\n",
      "country[T.Poland]    -0.9165   0.6760 -1.3558 0.1752 -2.2415  0.4084\n",
      "country[T.Romania]    0.8116   0.3529  2.3000 0.0214  0.1200  1.5033\n",
      "country[T.Russia]    -0.3323   0.2098 -1.5839 0.1132 -0.7435  0.0789\n",
      "country[T.Serbia]    -0.1166   0.3088 -0.3776 0.7057 -0.7218  0.4886\n",
      "country[T.Thailand]  -2.3970   1.1676 -2.0529 0.0401 -4.6856 -0.1085\n",
      "country[T.UK]        -0.2728   0.2526 -1.0801 0.2801 -0.7679  0.2223\n",
      "tobacco_ever[T.Yes]  -0.1641   0.1560 -1.0517 0.2929 -0.4698  0.1417\n",
      "age_group             0.9136   0.0770 11.8696 0.0000  0.7627  1.0644\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.107118  0.284966  0.174715  2.760020e-12    7.700455e-10\n",
      "sex[T.Male]           1.350310  2.530076  1.848347  1.256255e-04    3.504950e-02\n",
      "country[T.Brazil]     0.257129  0.748353  0.438661  2.497618e-03    6.968353e-01\n",
      "country[T.Canada]     0.191296  0.630008  0.347157  5.024391e-04    1.401805e-01\n",
      "country[T.Japan]      0.027557  0.183759  0.071161  4.761125e-08    1.328354e-05\n",
      "country[T.Lithuania]  0.179494  1.583516  0.533134  2.574595e-01    1.000000e+00\n",
      "country[T.Poland]     0.106297  1.504476  0.399901  1.751699e-01    1.000000e+00\n",
      "country[T.Romania]    1.127479  4.496293  2.251549  2.144958e-02    1.000000e+00\n",
      "country[T.Russia]     0.475434  1.082103  0.717265  1.132222e-01    1.000000e+00\n",
      "country[T.Serbia]     0.485887  1.630006  0.889943  7.057153e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009227  0.897172  0.090986  4.008294e-02    1.000000e+00\n",
      "country[T.UK]         0.463986  1.248885  0.761226  2.801020e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.625135  1.152204  0.848695  2.929305e-01    1.000000e+00\n",
      "age_group             2.144108  2.899188  2.493225  1.701868e-32    4.748212e-30\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1, its frequency is 0.76\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.87\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.84\n",
      "Using below/above median model for signature ID5, its frequency is 0.94\n",
      "Running logistic regression with parameter stage, signature SBS1\n",
      "Zero counts for signature SBS1: 432\n",
      "All counts for signature SBS1: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1083.9569 \n",
      "Link Function:         Logit             BIC:             -4683.6352\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -527.98   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -598.19   \n",
      "No. Observations:      863               Deviance:        1056.0    \n",
      "Df Model:              13                Pearson chi2:    863.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2660   0.2486 -5.0922 0.0000 -1.7533 -0.7787\n",
      "sex[T.Male]          -0.1803   0.1598 -1.1282 0.2592 -0.4936  0.1329\n",
      "country[T.Brazil]     0.1941   0.3254  0.5965 0.5508 -0.4437  0.8320\n",
      "country[T.Canada]     0.6978   0.4294  1.6250 0.1042 -0.1438  1.5394\n",
      "country[T.Japan]      0.4805   0.3853  1.2469 0.2124 -0.2748  1.2357\n",
      "country[T.Lithuania]  0.6830   0.6399  1.0674 0.2858 -0.5711  1.9371\n",
      "country[T.Poland]    -0.8882   0.6404 -1.3868 0.1655 -2.1434  0.3671\n",
      "country[T.Romania]   -0.9659   0.3235 -2.9858 0.0028 -1.6000 -0.3319\n",
      "country[T.Russia]     0.4003   0.2038  1.9640 0.0495  0.0008  0.7999\n",
      "country[T.Serbia]    -0.8211   0.3155 -2.6021 0.0093 -1.4395 -0.2026\n",
      "country[T.UK]         0.3895   0.2428  1.6042 0.1087 -0.0864  0.8655\n",
      "tobacco_ever[T.Yes]  -0.0381   0.1578 -0.2413 0.8093 -0.3474  0.2712\n",
      "stage                 0.4843   0.0683  7.0949 0.0000  0.3505  0.6181\n",
      "age_group             0.3937   0.0693  5.6839 0.0000  0.2580  0.5295\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.173199  0.458986  0.281950  3.539835e-07    9.876139e-05\n",
      "sex[T.Male]           0.610425  1.142185  0.834996  2.592300e-01    1.000000e+00\n",
      "country[T.Brazil]     0.641640  2.297908  1.214261  5.508281e-01    1.000000e+00\n",
      "country[T.Canada]     0.866030  4.661875  2.009309  1.041623e-01    1.000000e+00\n",
      "country[T.Japan]      0.759748  3.440946  1.616865  2.124290e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.564888  6.938378  1.979750  2.858008e-01    1.000000e+00\n",
      "country[T.Poland]     0.117253  1.443472  0.411401  1.654907e-01    1.000000e+00\n",
      "country[T.Romania]    0.201901  0.717581  0.380631  2.828132e-03    7.890487e-01\n",
      "country[T.Russia]     1.000828  2.225219  1.492334  4.952715e-02    1.000000e+00\n",
      "country[T.Serbia]     0.237039  0.816588  0.439958  9.265608e-03    1.000000e+00\n",
      "country[T.UK]         0.917227  2.376080  1.476281  1.086809e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.706541  1.311547  0.962633  8.092975e-01    1.000000e+00\n",
      "stage                 1.419800  1.855390  1.623047  1.294565e-12    3.611836e-10\n",
      "age_group             1.294293  1.698098  1.482510  1.316838e-08    3.673977e-06\n",
      "Running logistic regression with parameter stage, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig SBS2, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 858\n",
      "All counts for signature SBS2: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                              2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           2.581705e-01   15.223758  1.482377  4.393894e-01    1.000000e+00\n",
      "country[T.Brazil]     9.195284e-03   27.008579  1.369436  4.195337e-01    1.000000e+00\n",
      "country[T.Canada]     5.957064e-03   18.282863  0.900785  7.052777e-01    1.000000e+00\n",
      "country[T.Japan]      1.203208e-02   43.618340  1.931141  4.467131e-01    1.000000e+00\n",
      "country[T.Lithuania]  3.103245e-02  106.139931  4.849357  2.950676e-01    1.000000e+00\n",
      "country[T.Poland]     3.099131e-02  116.712414  4.920237  2.707379e-01    1.000000e+00\n",
      "country[T.Romania]    1.788175e-02   71.844507  2.837115  3.963580e-01    1.000000e+00\n",
      "country[T.Russia]     2.429930e-01   23.651645  1.994407  3.617198e-01    1.000000e+00\n",
      "country[T.Serbia]     9.314667e-03   27.801055  1.388864  4.510733e-01    1.000000e+00\n",
      "country[T.UK]         4.032347e-01   36.547587  3.211412  2.115521e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.782497e-01    6.205478  0.973586  3.422548e-01    1.000000e+00\n",
      "stage                 1.585167e+00   39.401462  4.762401  1.469611e-03    4.100216e-01\n",
      "age_group             5.469591e-01    2.865077  1.198923  4.689886e-01    1.000000e+00\n",
      "Intercept             1.794016e-07    0.007659  0.000174  2.702197e-11    7.539129e-09\n",
      "Running logistic regression with parameter stage, signature SBS4\n",
      "Zero counts for signature SBS4: 364\n",
      "All counts for signature SBS4: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1098.9927 \n",
      "Link Function:         Logit             BIC:             -4668.5993\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -535.50   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -587.58   \n",
      "No. Observations:      863               Deviance:        1071.0    \n",
      "Df Model:              13                Pearson chi2:    858.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.3996   0.2401 -1.6643 0.0960 -0.8701  0.0710\n",
      "sex[T.Male]           0.0315   0.1575  0.2001 0.8414 -0.2772  0.3403\n",
      "country[T.Brazil]    -0.5472   0.3215 -1.7020 0.0888 -1.1774  0.0829\n",
      "country[T.Canada]    -0.2928   0.3785 -0.7736 0.4392 -1.0347  0.4491\n",
      "country[T.Japan]      0.7949   0.4826  1.6469 0.0996 -0.1511  1.7408\n",
      "country[T.Lithuania] -0.1225   0.6029 -0.2031 0.8391 -1.3042  1.0593\n",
      "country[T.Poland]     1.0027   0.6967  1.4391 0.1501 -0.3629  2.3682\n",
      "country[T.Romania]   -0.9968   0.2988 -3.3362 0.0008 -1.5824 -0.4112\n",
      "country[T.Russia]    -0.2166   0.2021 -1.0718 0.2838 -0.6127  0.1795\n",
      "country[T.Serbia]    -0.4338   0.2890 -1.5012 0.1333 -1.0002  0.1326\n",
      "country[T.UK]         0.0009   0.2488  0.0036 0.9971 -0.4868  0.4886\n",
      "tobacco_ever[T.Yes]   0.7052   0.1565  4.5064 0.0000  0.3985  1.0119\n",
      "stage                -0.2779   0.0667 -4.1691 0.0000 -0.4085 -0.1472\n",
      "age_group             0.4370   0.0691  6.3208 0.0000  0.3015  0.5725\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.418902   1.073550  0.670606  9.604294e-02    1.000000e+00\n",
      "sex[T.Male]           0.757893   1.405302  1.032021  8.414040e-01    1.000000e+00\n",
      "country[T.Brazil]     0.308079   1.086483  0.578552  8.875784e-02    1.000000e+00\n",
      "country[T.Canada]     0.355317   1.566906  0.746156  4.391948e-01    1.000000e+00\n",
      "country[T.Japan]      0.859751   5.702104  2.214133  9.958417e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.271384   2.884333  0.884739  8.390518e-01    1.000000e+00\n",
      "country[T.Poland]     0.695678  10.678019  2.725521  1.501127e-01    1.000000e+00\n",
      "country[T.Romania]    0.205475   0.662846  0.369051  8.491738e-04    2.369195e-01\n",
      "country[T.Russia]     0.541881   1.196614  0.805246  2.838110e-01    1.000000e+00\n",
      "country[T.Serbia]     0.367816   1.141776  0.648046  1.333166e-01    1.000000e+00\n",
      "country[T.UK]         0.614595   1.630019  1.000901  9.971133e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.489544   2.750737  2.024190  6.594114e-06    1.839758e-03\n",
      "stage                 0.664639   0.863086  0.757391  3.058558e-05    8.533376e-03\n",
      "age_group             1.351860   1.772664  1.548029  2.602877e-10    7.262028e-08\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter stage, signature SBS5\n",
      "Zero counts for signature SBS5: 791\n",
      "All counts for signature SBS5: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             471.6515  \n",
      "Link Function:         Logit             BIC:             -5295.9405\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -221.83   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -247.74   \n",
      "No. Observations:      863               Deviance:        443.65    \n",
      "Df Model:              13                Pearson chi2:    848.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.3996   0.4666 -7.2861 0.0000 -4.3141 -2.4851\n",
      "sex[T.Male]          -0.1624   0.2792 -0.5817 0.5608 -0.7096  0.3848\n",
      "country[T.Brazil]     0.6212   0.4202  1.4786 0.1393 -0.2023  1.4447\n",
      "country[T.Canada]     0.2527   0.5524  0.4575 0.6473 -0.8300  1.3355\n",
      "country[T.Japan]      0.2125   0.5103  0.4165 0.6770 -0.7876  1.2127\n",
      "country[T.Lithuania]  0.2401   0.8137  0.2950 0.7680 -1.3548  1.8350\n",
      "country[T.Poland]    -0.1535   1.0891 -0.1409 0.8879 -2.2882  1.9812\n",
      "country[T.Romania]   -1.4322   0.7519 -1.9047 0.0568 -2.9060  0.0416\n",
      "country[T.Russia]    -1.1498   0.4724 -2.4340 0.0149 -2.0757 -0.2239\n",
      "country[T.Serbia]    -0.4830   0.5641 -0.8562 0.3919 -1.5886  0.6226\n",
      "country[T.UK]        -0.6430   0.4289 -1.4993 0.1338 -1.4836  0.1975\n",
      "tobacco_ever[T.Yes]   0.3633   0.2788  1.3032 0.1925 -0.1831  0.9098\n",
      "stage                -0.0960   0.1181 -0.8125 0.4165 -0.3275  0.1355\n",
      "age_group             0.5667   0.1277  4.4377 0.0000  0.3164  0.8170\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.013379  0.083319  0.033388  3.191011e-13    8.902921e-11\n",
      "sex[T.Male]           0.491853  1.469298  0.850105  5.607760e-01    1.000000e+00\n",
      "country[T.Brazil]     0.816883  4.240695  1.861223  1.392541e-01    1.000000e+00\n",
      "country[T.Canada]     0.436040  3.801948  1.287557  6.473065e-01    1.000000e+00\n",
      "country[T.Japan]      0.454915  3.362677  1.236823  6.770417e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.257993  6.264879  1.271335  7.679789e-01    1.000000e+00\n",
      "country[T.Poland]     0.101450  7.251352  0.857699  8.879190e-01    1.000000e+00\n",
      "country[T.Romania]    0.054693  1.042431  0.238775  5.681913e-02    1.000000e+00\n",
      "country[T.Russia]     0.125471  0.799366  0.316697  1.493251e-02    1.000000e+00\n",
      "country[T.Serbia]     0.204202  1.863846  0.616929  3.918806e-01    1.000000e+00\n",
      "country[T.UK]         0.226831  1.218410  0.525712  1.337878e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.832678  2.483781  1.438120  1.925064e-01    1.000000e+00\n",
      "stage                 0.720715  1.145160  0.908479  4.164869e-01    1.000000e+00\n",
      "age_group             1.372219  2.263780  1.762499  9.091980e-06    2.536662e-03\n",
      "Running logistic regression with parameter stage, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig SBS12, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 821\n",
      "All counts for signature SBS12: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.534264    3.350392    1.311729  5.222624e-01    1.000000e+00\n",
      "country[T.Brazil]      0.477709   13.091351    2.807130  2.144215e-01    1.000000e+00\n",
      "country[T.Canada]      1.243392   28.890082    6.176945  2.730388e-02    1.000000e+00\n",
      "country[T.Japan]      43.371131  494.141614  131.637437  8.191873e-24    2.285532e-21\n",
      "country[T.Lithuania]   0.013856   19.299421    1.877290  6.309558e-01    1.000000e+00\n",
      "country[T.Poland]      0.018808   27.181358    2.563716  5.125309e-01    1.000000e+00\n",
      "country[T.Romania]     0.345522    9.367563    2.022688  3.699155e-01    1.000000e+00\n",
      "country[T.Russia]      0.117085    3.211097    0.688426  5.871136e-01    1.000000e+00\n",
      "country[T.Serbia]      0.130892    7.385723    1.326928  6.893591e-01    1.000000e+00\n",
      "country[T.UK]          0.218644    5.865059    1.273695  6.754595e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.250242    1.510197    0.626560  2.838173e-01    1.000000e+00\n",
      "stage                  0.640301    1.366779    0.938162  6.602249e-01    1.000000e+00\n",
      "age_group              0.845855    1.733129    1.204159  2.927212e-01    1.000000e+00\n",
      "Intercept              0.003021    0.054792    0.014276  7.244085e-13    2.021100e-10\n",
      "Running logistic regression with parameter stage, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 716\n",
      "All counts for signature SBS13: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.006452  2.343841  1.526324  4.638112e-02    1.000000e+00\n",
      "country[T.Brazil]     0.528609  2.583060  1.207168  6.320136e-01    1.000000e+00\n",
      "country[T.Canada]     0.524891  2.646047  1.205787  6.495842e-01    1.000000e+00\n",
      "country[T.Japan]      0.130496  1.495948  0.511303  2.366244e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.051078  2.330314  0.496417  4.090473e-01    1.000000e+00\n",
      "country[T.Poland]     0.000980  1.023597  0.127121  5.301705e-02    1.000000e+00\n",
      "country[T.Romania]    0.045035  0.704297  0.225080  7.529470e-03    1.000000e+00\n",
      "country[T.Russia]     0.827878  2.231026  1.357137  2.249267e-01    1.000000e+00\n",
      "country[T.Serbia]     0.278560  1.449829  0.667580  3.161909e-01    1.000000e+00\n",
      "country[T.UK]         0.627756  2.036191  1.140656  6.595440e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.772031  1.699220  1.144109  4.923735e-01    1.000000e+00\n",
      "stage                 1.509862  2.110156  1.780476  2.593624e-12    7.236212e-10\n",
      "age_group             0.851656  1.203322  1.011783  8.812263e-01    1.000000e+00\n",
      "Intercept             0.034612  0.129146  0.068049  1.030451e-18    2.874957e-16\n",
      "Running logistic regression with parameter stage, signature SBS18\n",
      "Zero counts for signature SBS18: 795\n",
      "All counts for signature SBS18: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             463.3042  \n",
      "Link Function:         Logit             BIC:             -5304.2879\n",
      "Dependent Variable:    SBS18_bool        Log-Likelihood:  -217.65   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -238.03   \n",
      "No. Observations:      863               Deviance:        435.30    \n",
      "Df Model:              13                Pearson chi2:    849.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.5852   0.4272 -6.0514 0.0000 -3.4225 -1.7479\n",
      "sex[T.Male]           0.3392   0.2952  1.1491 0.2505 -0.2394  0.9178\n",
      "country[T.Brazil]     0.2492   0.5040  0.4945 0.6210 -0.7386  1.2370\n",
      "country[T.Canada]    -0.2150   0.5902 -0.3642 0.7157 -1.3718  0.9418\n",
      "country[T.Japan]     -0.3090   0.7819 -0.3951 0.6927 -1.8415  1.2236\n",
      "country[T.Lithuania] -0.2507   1.0983 -0.2283 0.8194 -2.4034  1.9019\n",
      "country[T.Poland]    -0.5507   1.0779 -0.5109 0.6094 -2.6632  1.5619\n",
      "country[T.Romania]    0.0373   0.5290  0.0705 0.9438 -0.9996  1.0742\n",
      "country[T.Russia]    -0.7464   0.3777 -1.9764 0.0481 -1.4866 -0.0062\n",
      "country[T.Serbia]    -0.3934   0.5279 -0.7451 0.4562 -1.4281  0.6414\n",
      "country[T.UK]        -0.3047   0.4397 -0.6930 0.4883 -1.1664  0.5570\n",
      "tobacco_ever[T.Yes]  -0.4378   0.2737 -1.5997 0.1097 -0.9743  0.0986\n",
      "stage                 0.6276   0.1208  5.1971 0.0000  0.3909  0.8643\n",
      "age_group            -0.2886   0.1184 -2.4373 0.0148 -0.5207 -0.0565\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.032630  0.174139  0.075380  1.435764e-09    4.005781e-07\n",
      "sex[T.Male]           0.787131  2.503873  1.403879  2.504955e-01    1.000000e+00\n",
      "country[T.Brazil]     0.477791  3.445306  1.283018  6.209630e-01    1.000000e+00\n",
      "country[T.Canada]     0.253662  2.564704  0.806578  7.157107e-01    1.000000e+00\n",
      "country[T.Japan]      0.158582  3.399307  0.734213  6.927496e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.090412  6.698760  0.778233  8.194241e-01    1.000000e+00\n",
      "country[T.Poland]     0.069725  4.767862  0.576574  6.094355e-01    1.000000e+00\n",
      "country[T.Romania]    0.368040  2.927554  1.038006  9.437876e-01    1.000000e+00\n",
      "country[T.Russia]     0.226136  0.993805  0.474062  4.810754e-02    1.000000e+00\n",
      "country[T.Serbia]     0.239759  1.899091  0.674777  4.562078e-01    1.000000e+00\n",
      "country[T.UK]         0.311490  1.745509  0.737366  4.883284e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.377470  1.103643  0.645440  1.096752e-01    1.000000e+00\n",
      "stage                 1.478326  2.373293  1.873099  2.024627e-07    5.648709e-05\n",
      "age_group             0.594089  0.945042  0.749292  1.479766e-02    1.000000e+00\n",
      "Running logistic regression with parameter stage, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig SBS21, perfect or near-perfect separation for category 3. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 858\n",
      "All counts for signature SBS21: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.149582     4.792894   0.783608  5.513464e-01        1.000000\n",
      "country[T.Brazil]     0.025555   878.917786   4.738345  2.932684e-01        1.000000\n",
      "country[T.Canada]     0.025601   928.126503   4.868718  4.452754e-01        1.000000\n",
      "country[T.Japan]      0.043095  1513.442983   8.074225  2.549467e-01        1.000000\n",
      "country[T.Lithuania]  0.087241  3225.439096  16.819743  1.355199e-01        1.000000\n",
      "country[T.Poland]     0.055388  2067.437818  10.723153  1.792561e-01        1.000000\n",
      "country[T.Romania]    1.746490  3119.093948  22.306018  1.239333e-02        1.000000\n",
      "country[T.Russia]     0.138655   399.849612   2.696280  3.443660e-01        1.000000\n",
      "country[T.Serbia]     0.015572   543.558911   2.909584  3.341721e-01        1.000000\n",
      "country[T.UK]         0.868720  1521.691677  10.956291  5.101161e-02        1.000000\n",
      "tobacco_ever[T.Yes]   0.252916     8.193495   1.314884  4.633767e-01        1.000000\n",
      "stage                 0.654803     3.204521   1.413130  2.842934e-01        1.000000\n",
      "age_group             0.311335     1.445713   0.676473  2.492456e-01        1.000000\n",
      "Intercept             0.000020     0.050792   0.003344  1.398688e-07        0.000039\n",
      "Running logistic regression with parameter stage, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 798\n",
      "All counts for signature SBS22: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]             0.255390     1.250443    0.571260  1.467149e-01    1.000000e+00\n",
      "country[T.Brazil]       0.356091    57.772300    4.536059  1.893701e-01    1.000000e+00\n",
      "country[T.Canada]       0.700059   126.300351    9.374544  8.282001e-02    1.000000e+00\n",
      "country[T.Japan]        0.012503    36.610278    1.860908  5.207810e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.029410    92.178771    4.465880  3.516897e-01    1.000000e+00\n",
      "country[T.Poland]       0.064892   217.121389   10.031135  2.226390e-01    1.000000e+00\n",
      "country[T.Romania]    125.773090  5264.033049  549.054304  5.801606e-39    1.618648e-36\n",
      "country[T.Russia]       0.004003    11.337768    0.589965  6.205841e-01    1.000000e+00\n",
      "country[T.Serbia]      19.077596   805.456231   84.346791  7.818120e-13    2.181255e-10\n",
      "country[T.UK]           0.167651    26.573874    2.110834  4.335954e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.522104     2.690555    1.172446  5.706994e-01    1.000000e+00\n",
      "stage                   0.543149     1.164869    0.800565  2.239736e-01    1.000000e+00\n",
      "age_group               1.488048     3.172366    2.126866  1.389936e-05    3.877922e-03\n",
      "Intercept               0.000121     0.008433    0.001409  8.798599e-23    2.454809e-20\n",
      "Running logistic regression with parameter stage, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig SBS44, perfect or near-perfect separation for category 3. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 858\n",
      "All counts for signature SBS44: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.149582     4.792894   0.783608  5.513464e-01        1.000000\n",
      "country[T.Brazil]     0.025555   878.917786   4.738345  2.932684e-01        1.000000\n",
      "country[T.Canada]     0.025601   928.126503   4.868718  4.452754e-01        1.000000\n",
      "country[T.Japan]      0.043095  1513.442983   8.074225  2.549467e-01        1.000000\n",
      "country[T.Lithuania]  0.087241  3225.439096  16.819743  1.355199e-01        1.000000\n",
      "country[T.Poland]     0.055388  2067.437818  10.723153  1.792561e-01        1.000000\n",
      "country[T.Romania]    1.746490  3119.093948  22.306018  1.239333e-02        1.000000\n",
      "country[T.Russia]     0.138655   399.849612   2.696280  3.443660e-01        1.000000\n",
      "country[T.Serbia]     0.015572   543.558911   2.909584  3.341721e-01        1.000000\n",
      "country[T.UK]         0.868720  1521.691677  10.956291  5.101161e-02        1.000000\n",
      "tobacco_ever[T.Yes]   0.252916     8.193495   1.314884  4.633767e-01        1.000000\n",
      "stage                 0.654803     3.204521   1.413130  2.842934e-01        1.000000\n",
      "age_group             0.311335     1.445713   0.676473  2.492456e-01        1.000000\n",
      "Intercept             0.000020     0.050792   0.003344  1.398688e-07        0.000039\n",
      "Running logistic regression with parameter stage, signature SBS1536A\n",
      "Zero counts for signature SBS1536A: 432\n",
      "All counts for signature SBS1536A: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1028.4939 \n",
      "Link Function:         Logit             BIC:             -4739.0982\n",
      "Dependent Variable:    SBS1536A_bool     Log-Likelihood:  -500.25   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -598.19   \n",
      "No. Observations:      863               Deviance:        1000.5    \n",
      "Df Model:              13                Pearson chi2:    857.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1058   0.2556 -4.3269 0.0000 -1.6067 -0.6049\n",
      "sex[T.Male]           0.7101   0.1677  4.2345 0.0000  0.3814  1.0388\n",
      "country[T.Brazil]    -0.9589   0.3313 -2.8943 0.0038 -1.6082 -0.3095\n",
      "country[T.Canada]    -1.6150   0.4059 -3.9791 0.0001 -2.4105 -0.8195\n",
      "country[T.Japan]     -2.8049   0.4639 -6.0465 0.0000 -3.7140 -1.8957\n",
      "country[T.Lithuania] -0.1436   0.6495 -0.2211 0.8250 -1.4166  1.1294\n",
      "country[T.Poland]    -0.2802   0.6243 -0.4488 0.6536 -1.5037  0.9434\n",
      "country[T.Romania]   -0.9725   0.3137 -3.0998 0.0019 -1.5874 -0.3576\n",
      "country[T.Russia]    -1.1986   0.2142 -5.5969 0.0000 -1.6184 -0.7789\n",
      "country[T.Serbia]    -0.7363   0.3029 -2.4308 0.0151 -1.3301 -0.1426\n",
      "country[T.UK]        -0.9211   0.2521 -3.6533 0.0003 -1.4153 -0.4270\n",
      "tobacco_ever[T.Yes]   0.1100   0.1622  0.6779 0.4978 -0.2080  0.4279\n",
      "stage                -0.0517   0.0694 -0.7458 0.4558 -0.1877  0.0842\n",
      "age_group             0.7429   0.0771  9.6310 0.0000  0.5917  0.8940\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.200558  0.546133  0.330955  1.511991e-05    4.218455e-03\n",
      "sex[T.Male]           1.464376  2.825797  2.034215  2.290852e-05    6.391476e-03\n",
      "country[T.Brazil]     0.200243  0.733777  0.383319  3.799929e-03    1.000000e+00\n",
      "country[T.Canada]     0.089774  0.440656  0.198895  6.918316e-05    1.930210e-02\n",
      "country[T.Japan]      0.024379  0.150217  0.060515  1.480017e-09    4.129246e-07\n",
      "country[T.Lithuania]  0.242537  3.093690  0.866218  8.249958e-01    1.000000e+00\n",
      "country[T.Poland]     0.222313  2.568612  0.755668  6.535911e-01    1.000000e+00\n",
      "country[T.Romania]    0.204455  0.699357  0.378136  1.936763e-03    5.403569e-01\n",
      "country[T.Russia]     0.198219  0.458918  0.301606  2.182768e-08    6.089923e-06\n",
      "country[T.Serbia]     0.264457  0.867083  0.478859  1.506668e-02    1.000000e+00\n",
      "country[T.UK]         0.242853  0.652493  0.398070  2.588542e-04    7.222032e-02\n",
      "tobacco_ever[T.Yes]   0.812238  1.534013  1.116236  4.978330e-01    1.000000e+00\n",
      "stage                 0.828837  1.087887  0.949569  4.557641e-01    1.000000e+00\n",
      "age_group             1.807019  2.444962  2.101926  5.917246e-22    1.650912e-19\n",
      "Running logistic regression with parameter stage, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 432\n",
      "All counts for signature SBS1536B: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1131.2392 \n",
      "Link Function:         Logit             BIC:             -4636.3528\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -551.62   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -598.19   \n",
      "No. Observations:      863               Deviance:        1103.2    \n",
      "Df Model:              13                Pearson chi2:    864.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.4539   0.2460 -5.9111 0.0000 -1.9359 -0.9718\n",
      "sex[T.Male]          -0.0677   0.1553 -0.4360 0.6629 -0.3720  0.2366\n",
      "country[T.Brazil]    -0.7269   0.3353 -2.1681 0.0302 -1.3841 -0.0698\n",
      "country[T.Canada]    -0.3700   0.3806 -0.9721 0.3310 -1.1159  0.3760\n",
      "country[T.Japan]     -0.3960   0.3793 -1.0439 0.2965 -1.1394  0.3474\n",
      "country[T.Lithuania] -0.3636   0.5936 -0.6126 0.5401 -1.5270  0.7997\n",
      "country[T.Poland]    -0.4022   0.6172 -0.6518 0.5145 -1.6119  0.8074\n",
      "country[T.Romania]    0.3089   0.2957  1.0445 0.2962 -0.2707  0.8884\n",
      "country[T.Russia]     0.2740   0.2010  1.3628 0.1730 -0.1201  0.6680\n",
      "country[T.Serbia]     0.1559   0.2914  0.5352 0.5925 -0.4152  0.7271\n",
      "country[T.UK]         0.3418   0.2409  1.4189 0.1559 -0.1303  0.8138\n",
      "tobacco_ever[T.Yes]   0.2578   0.1535  1.6790 0.0932 -0.0431  0.5588\n",
      "stage                 0.2347   0.0654  3.5876 0.0003  0.1065  0.3629\n",
      "age_group             0.5253   0.0690  7.6187 0.0000  0.3902  0.6605\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.144290  0.378399  0.233665  3.397361e-09    9.478636e-07\n",
      "sex[T.Male]           0.689371  1.266939  0.934553  6.628545e-01    1.000000e+00\n",
      "country[T.Brazil]     0.250556  0.932608  0.483395  3.015444e-02    1.000000e+00\n",
      "country[T.Canada]     0.327607  1.456407  0.690745  3.309934e-01    1.000000e+00\n",
      "country[T.Japan]      0.320024  1.415447  0.673036  2.965171e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.217187  2.224942  0.695147  5.401241e-01    1.000000e+00\n",
      "country[T.Poland]     0.199515  2.242005  0.668814  5.145499e-01    1.000000e+00\n",
      "country[T.Romania]    0.762850  2.431291  1.361878  2.962419e-01    1.000000e+00\n",
      "country[T.Russia]     0.886859  1.950394  1.315190  1.729601e-01    1.000000e+00\n",
      "country[T.Serbia]     0.660236  2.068977  1.168766  5.925146e-01    1.000000e+00\n",
      "country[T.UK]         0.877827  2.256502  1.407415  1.559159e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.957775  1.748495  1.294088  9.315377e-02    1.000000e+00\n",
      "stage                 1.112363  1.437545  1.264544  3.336821e-04    9.309731e-02\n",
      "age_group             1.477243  1.935690  1.691001  2.563394e-14    7.151868e-12\n",
      "Running logistic regression with parameter stage, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 764\n",
      "All counts for signature SBS1536F: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.698900   1.735338  1.095816  6.782872e-01        1.000000\n",
      "country[T.Brazil]     0.773992   4.385328  1.915529  1.507720e-01        1.000000\n",
      "country[T.Canada]     1.368519   9.401507  3.726722  1.156750e-02        1.000000\n",
      "country[T.Japan]      0.585848   4.632379  1.773713  2.839855e-01        1.000000\n",
      "country[T.Lithuania]  0.835144  12.243925  3.557665  8.040352e-02        1.000000\n",
      "country[T.Poland]     0.003587   3.807359  0.466370  5.424520e-01        1.000000\n",
      "country[T.Romania]    0.075497   1.228881  0.381883  1.124048e-01        1.000000\n",
      "country[T.Russia]     0.986694   3.232183  1.772016  5.485523e-02        1.000000\n",
      "country[T.Serbia]     0.302640   2.230566  0.890887  7.862221e-01        1.000000\n",
      "country[T.UK]         0.832263   3.364753  1.691225  1.413096e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.474300   1.170798  0.746962  2.024803e-01        1.000000\n",
      "stage                 0.621275   0.931466  0.764374  7.255244e-03        1.000000\n",
      "age_group             0.792232   1.164358  0.960033  6.647903e-01        1.000000\n",
      "Intercept             0.072273   0.297548  0.149691  1.586396e-08        0.000004\n",
      "Running logistic regression with parameter stage, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 774\n",
      "All counts for signature SBS1536I: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.473790    1.980545    0.967187  5.657526e-01    1.000000e+00\n",
      "country[T.Brazil]      0.123243    6.567688    1.238838  5.367954e-01    1.000000e+00\n",
      "country[T.Canada]      0.003466    4.536396    0.465697  5.612486e-01    1.000000e+00\n",
      "country[T.Japan]       0.142272    7.850544    1.447697  4.874220e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.009544   13.060053    1.294048  5.683438e-01    1.000000e+00\n",
      "country[T.Poland]      0.015915   23.116911    2.180889  4.578855e-01    1.000000e+00\n",
      "country[T.Romania]    70.275006  654.334588  195.704730  1.804799e-38    5.035389e-36\n",
      "country[T.Russia]      0.047407    2.457397    0.471676  3.067854e-01    1.000000e+00\n",
      "country[T.Serbia]     28.266759  247.940003   76.667524  5.910934e-24    1.649151e-21\n",
      "country[T.UK]          0.001271    1.533526    0.168246  1.066774e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.538446    2.295409    1.104428  5.156657e-01    1.000000e+00\n",
      "stage                  0.811269    1.547033    1.120626  3.696889e-01    1.000000e+00\n",
      "age_group              1.574450    3.093042    2.170748  5.635380e-07    1.572271e-04\n",
      "Intercept              0.000598    0.011044    0.002843  1.120658e-26    3.126637e-24\n",
      "Running logistic regression with parameter stage, signature DBS2\n",
      "Zero counts for signature DBS2: 502\n",
      "All counts for signature DBS2: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1132.5082 \n",
      "Link Function:         Logit             BIC:             -4635.0839\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -552.25   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -586.62   \n",
      "No. Observations:      863               Deviance:        1104.5    \n",
      "Df Model:              13                Pearson chi2:    865.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.4771   0.2455 -6.0170 0.0000 -1.9583 -0.9960\n",
      "sex[T.Male]           0.4012   0.1563  2.5669 0.0103  0.0948  0.7075\n",
      "country[T.Brazil]    -0.2937   0.3231 -0.9089 0.3634 -0.9271  0.3396\n",
      "country[T.Canada]    -0.6216   0.3824 -1.6257 0.1040 -1.3711  0.1278\n",
      "country[T.Japan]     -0.9561   0.3978 -2.4035 0.0162 -1.7357 -0.1764\n",
      "country[T.Lithuania] -0.2567   0.6005 -0.4274 0.6691 -1.4335  0.9202\n",
      "country[T.Poland]    -0.3619   0.6060 -0.5971 0.5504 -1.5497  0.8259\n",
      "country[T.Romania]    0.4576   0.2920  1.5670 0.1171 -0.1148  1.0301\n",
      "country[T.Russia]    -0.2550   0.2030 -1.2564 0.2090 -0.6528  0.1428\n",
      "country[T.Serbia]     0.3475   0.2842  1.2225 0.2215 -0.2096  0.9046\n",
      "country[T.UK]        -0.2298   0.2363 -0.9725 0.3308 -0.6929  0.2333\n",
      "tobacco_ever[T.Yes]   0.7522   0.1532  4.9102 0.0000  0.4520  1.0525\n",
      "stage                 0.0920   0.0649  1.4174 0.1564 -0.0352  0.2192\n",
      "age_group             0.2655   0.0668  3.9771 0.0001  0.1347  0.3963\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.141097  0.369361  0.228289  1.776978e-09    4.957768e-07\n",
      "sex[T.Male]           1.099493  2.028842  1.493552  1.026197e-02    1.000000e+00\n",
      "country[T.Brazil]     0.395712  1.404444  0.745490  3.633936e-01    1.000000e+00\n",
      "country[T.Canada]     0.253834  1.136337  0.537067  1.040109e-01    1.000000e+00\n",
      "country[T.Japan]      0.176274  0.838257  0.384400  1.623900e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.238462  2.509825  0.773627  6.690524e-01    1.000000e+00\n",
      "country[T.Poland]     0.212310  2.283985  0.696357  5.504109e-01    1.000000e+00\n",
      "country[T.Romania]    0.891586  2.801208  1.580354  1.171059e-01    1.000000e+00\n",
      "country[T.Russia]     0.520593  1.153485  0.774917  2.089623e-01    1.000000e+00\n",
      "country[T.Serbia]     0.810900  2.471011  1.415536  2.215015e-01    1.000000e+00\n",
      "country[T.UK]         0.500141  1.262785  0.794714  3.308127e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.571416  2.864828  2.121753  9.097540e-07    2.538214e-04\n",
      "stage                 0.965392  1.245131  1.096376  1.563664e-01    1.000000e+00\n",
      "age_group             1.144146  1.486371  1.304081  6.974850e-05    1.945983e-02\n",
      "Running logistic regression with parameter stage, signature DBS4\n",
      "Zero counts for signature DBS4: 785\n",
      "All counts for signature DBS4: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             529.4059  \n",
      "Link Function:         Logit             BIC:             -5238.1862\n",
      "Dependent Variable:    DBS4_bool         Log-Likelihood:  -250.70   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -261.85   \n",
      "No. Observations:      863               Deviance:        501.41    \n",
      "Df Model:              13                Pearson chi2:    864.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.5558   0.4067 -6.2844 0.0000 -3.3529 -1.7587\n",
      "sex[T.Male]          -0.1612   0.2611 -0.6176 0.5369 -0.6730  0.3505\n",
      "country[T.Brazil]    -0.9029   0.6286 -1.4364 0.1509 -2.1350  0.3291\n",
      "country[T.Canada]    -0.3078   0.5801 -0.5307 0.5956 -1.4447  0.8291\n",
      "country[T.Japan]     -0.9338   0.7586 -1.2309 0.2183 -2.4206  0.5530\n",
      "country[T.Lithuania]  1.0173   0.6386  1.5930 0.1112 -0.2343  2.2689\n",
      "country[T.Poland]    -0.5046   1.0687 -0.4722 0.6368 -2.5992  1.5900\n",
      "country[T.Romania]   -0.7568   0.5534 -1.3676 0.1714 -1.8415  0.3278\n",
      "country[T.Russia]    -0.2951   0.3204 -0.9210 0.3571 -0.9231  0.3329\n",
      "country[T.Serbia]    -1.4980   0.7470 -2.0055 0.0449 -2.9620 -0.0340\n",
      "country[T.UK]        -0.7128   0.4188 -1.7019 0.0888 -1.5337  0.1081\n",
      "tobacco_ever[T.Yes]   0.0440   0.2611  0.1686 0.8661 -0.4678  0.5558\n",
      "stage                 0.1320   0.1075  1.2276 0.2196 -0.0788  0.3428\n",
      "age_group             0.2465   0.1140  2.1618 0.0306  0.0230  0.4699\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.034982  0.172267  0.077629  3.292150e-10    9.185098e-08\n",
      "sex[T.Male]           0.510197  1.419758  0.851091  5.368686e-01    1.000000e+00\n",
      "country[T.Brazil]     0.118240  1.389783  0.405374  1.508975e-01    1.000000e+00\n",
      "country[T.Canada]     0.235809  2.291141  0.735031  5.956199e-01    1.000000e+00\n",
      "country[T.Japan]      0.088866  1.738544  0.393062  2.183488e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.791099  9.668458  2.765630  1.111583e-01    1.000000e+00\n",
      "country[T.Poland]     0.074335  4.903775  0.603755  6.368165e-01    1.000000e+00\n",
      "country[T.Romania]    0.158583  1.387901  0.469146  1.714268e-01    1.000000e+00\n",
      "country[T.Russia]     0.397303  1.395005  0.744473  3.570692e-01    1.000000e+00\n",
      "country[T.Serbia]     0.051714  0.966557  0.223572  4.490932e-02    1.000000e+00\n",
      "country[T.UK]         0.215735  1.114161  0.490268  8.878099e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.626397  1.743373  1.045009  8.661119e-01    1.000000e+00\n",
      "stage                 0.924260  1.408882  1.141128  2.196044e-01    1.000000e+00\n",
      "age_group             1.023280  1.599827  1.279480  3.063157e-02    1.000000e+00\n",
      "Running logistic regression with parameter stage, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 831\n",
      "All counts for signature DBS9: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.644638   3.051344  1.370259  4.003012e-01        1.000000\n",
      "country[T.Brazil]     0.204059   3.855651  1.080524  7.660031e-01        1.000000\n",
      "country[T.Canada]     0.818579  13.078912  3.596584  8.498626e-02        1.000000\n",
      "country[T.Japan]      0.001852   1.966667  0.240872  2.161388e-01        1.000000\n",
      "country[T.Lithuania]  0.281581  13.891603  2.790250  2.999943e-01        1.000000\n",
      "country[T.Poland]     0.008698  10.269507  1.149784  7.544485e-01        1.000000\n",
      "country[T.Romania]    0.001183   1.202123  0.152713  7.858185e-02        1.000000\n",
      "country[T.Russia]     0.487463   2.984782  1.210061  6.307050e-01        1.000000\n",
      "country[T.Serbia]     0.306277   3.976701  1.246137  6.681358e-01        1.000000\n",
      "country[T.UK]         0.093643   1.706269  0.489609  2.666967e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.445154   1.994565  0.944013  8.142000e-01        1.000000\n",
      "stage                 0.435721   0.910645  0.643877  1.132018e-02        1.000000\n",
      "age_group             0.779779   1.495544  1.076846  6.188263e-01        1.000000\n",
      "Intercept             0.014654   0.152641  0.050407  7.442822e-09        0.000002\n",
      "Running logistic regression with parameter stage, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 778\n",
      "All counts for signature DBS78C: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.195048  3.445776  1.997449  7.757985e-03    1.000000e+00\n",
      "country[T.Brazil]     0.687742  4.179490  1.779626  2.184422e-01    1.000000e+00\n",
      "country[T.Canada]     1.559600  9.660497  3.967679  4.596601e-03    1.000000e+00\n",
      "country[T.Japan]      0.104319  1.811964  0.540030  3.425392e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.141096  6.112643  1.346544  7.330566e-01    1.000000e+00\n",
      "country[T.Poland]     0.003626  3.882984  0.472079  5.522423e-01    1.000000e+00\n",
      "country[T.Romania]    0.290261  2.139018  0.855079  7.363149e-01    1.000000e+00\n",
      "country[T.Russia]     0.549588  2.119719  1.086695  7.954464e-01    1.000000e+00\n",
      "country[T.Serbia]     0.446992  2.933466  1.218993  6.691372e-01    1.000000e+00\n",
      "country[T.UK]         0.749034  3.016685  1.520914  2.388876e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.694319  1.807274  1.117265  6.434930e-01    1.000000e+00\n",
      "stage                 0.635660  0.978891  0.792730  3.024812e-02    1.000000e+00\n",
      "age_group             1.011052  1.542380  1.245672  3.885288e-02    1.000000e+00\n",
      "Intercept             0.019873  0.101736  0.046297  3.719549e-17    1.037754e-14\n",
      "Running logistic regression with parameter stage, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 808\n",
      "All counts for signature DBS78D: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.406891    1.547872   0.793363  3.784735e-01    1.000000e+00\n",
      "country[T.Brazil]      0.002965    3.591173   0.392596  3.622750e-01    1.000000e+00\n",
      "country[T.Canada]      0.003639    4.635573   0.486383  5.862587e-01    1.000000e+00\n",
      "country[T.Japan]       0.169413    9.096400   1.706840  4.300884e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.405732   24.371231   4.247324  1.555924e-01    1.000000e+00\n",
      "country[T.Poland]      0.598873   37.820877   6.369788  8.875973e-02    1.000000e+00\n",
      "country[T.Romania]    16.829905  127.176565  42.449141  1.592556e-19    4.443232e-17\n",
      "country[T.Russia]      0.302812    4.253996   1.163612  5.468658e-01    1.000000e+00\n",
      "country[T.Serbia]      3.696390   33.433973  10.495066  8.308576e-06    2.318093e-03\n",
      "country[T.UK]          0.158537    3.854464   0.901729  5.665637e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.492236    1.965093   0.983079  5.599000e-01    1.000000e+00\n",
      "stage                  0.856818    1.570891   1.160623  2.654870e-01    1.000000e+00\n",
      "age_group              1.213801    2.225331   1.627404  8.215155e-04    2.292028e-01\n",
      "Intercept              0.001775    0.024708   0.007249  6.737650e-22    1.879804e-19\n",
      "Running logistic regression with parameter stage, signature ID1\n",
      "Zero counts for signature ID1: 432\n",
      "All counts for signature ID1: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1128.5198 \n",
      "Link Function:         Logit             BIC:             -4639.0723\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -550.26   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -598.19   \n",
      "No. Observations:      863               Deviance:        1100.5    \n",
      "Df Model:              13                Pearson chi2:    861.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.8041   0.2375 -3.3851 0.0007 -1.2696 -0.3385\n",
      "sex[T.Male]           0.1729   0.1561  1.1075 0.2681 -0.1331  0.4788\n",
      "country[T.Brazil]     0.5616   0.3247  1.7293 0.0838 -0.0749  1.1981\n",
      "country[T.Canada]     0.4152   0.4042  1.0273 0.3043 -0.3769  1.2073\n",
      "country[T.Japan]      0.2250   0.3763  0.5978 0.5500 -0.5126  0.9625\n",
      "country[T.Lithuania] -1.4666   0.6876 -2.1327 0.0329 -2.8143 -0.1188\n",
      "country[T.Poland]    -1.8314   0.7900 -2.3183 0.0204 -3.3797 -0.2831\n",
      "country[T.Romania]   -0.2958   0.2945 -1.0043 0.3152 -0.8730  0.2815\n",
      "country[T.Russia]     0.2079   0.1999  1.0397 0.2985 -0.1840  0.5997\n",
      "country[T.Serbia]    -0.4142   0.2937 -1.4100 0.1585 -0.9899  0.1615\n",
      "country[T.UK]        -0.2052   0.2375 -0.8641 0.3875 -0.6706  0.2602\n",
      "tobacco_ever[T.Yes]  -0.4353   0.1540 -2.8269 0.0047 -0.7372 -0.1335\n",
      "stage                 0.4366   0.0665  6.5698 0.0000  0.3063  0.5668\n",
      "age_group             0.2238   0.0658  3.3997 0.0007  0.0948  0.3529\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.280947  0.712829  0.447512  7.114170e-04    1.984854e-01\n",
      "sex[T.Male]           0.875407  1.614144  1.188711  2.680869e-01    1.000000e+00\n",
      "country[T.Brazil]     0.927834  3.313724  1.753450  8.375273e-02    1.000000e+00\n",
      "country[T.Canada]     0.685953  3.344444  1.514640  3.042881e-01    1.000000e+00\n",
      "country[T.Japan]      0.598929  2.618314  1.252272  5.499807e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.059945  0.887979  0.230716  3.294624e-02    1.000000e+00\n",
      "country[T.Poland]     0.034057  0.753476  0.160192  2.043431e-02    1.000000e+00\n",
      "country[T.Romania]    0.417678  1.325086  0.743949  3.152495e-01    1.000000e+00\n",
      "country[T.Russia]     0.831935  1.821659  1.231057  2.984875e-01    1.000000e+00\n",
      "country[T.Serbia]     0.371626  1.175317  0.660892  1.585358e-01    1.000000e+00\n",
      "country[T.UK]         0.511385  1.297228  0.814483  3.875236e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.478463  0.875024  0.647045  4.700501e-03    1.000000e+00\n",
      "stage                 1.358440  1.762662  1.547408  5.036714e-11    1.405243e-08\n",
      "age_group             1.099426  1.423141  1.250855  6.745994e-04    1.882132e-01\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter stage, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 847\n",
      "All counts for signature ID2: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.323212   2.633830  0.892777  5.580948e-01    1.000000e+00\n",
      "country[T.Brazil]     0.003060   3.670736  0.404334  3.620873e-01    1.000000e+00\n",
      "country[T.Canada]     0.003176   4.005062  0.423619  5.114434e-01    1.000000e+00\n",
      "country[T.Japan]      0.682848  18.065177  4.004579  9.326782e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.011022  14.580697  1.482465  5.171242e-01    1.000000e+00\n",
      "country[T.Poland]     0.008909  11.968910  1.203490  5.343768e-01    1.000000e+00\n",
      "country[T.Romania]    0.357290   8.959397  2.052604  2.954233e-01    1.000000e+00\n",
      "country[T.Russia]     0.192902   2.740694  0.745480  4.664284e-01    1.000000e+00\n",
      "country[T.Serbia]     0.002132   2.566185  0.281998  2.408292e-01    1.000000e+00\n",
      "country[T.UK]         0.314953   5.354791  1.388655  4.646796e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.254352   2.011082  0.722625  3.857186e-01    1.000000e+00\n",
      "stage                 1.095640   2.599341  1.664271  1.406359e-02    1.000000e+00\n",
      "age_group             0.533946   1.293427  0.828918  3.203410e-01    1.000000e+00\n",
      "Intercept             0.004330   0.094724  0.022924  2.399731e-09    6.695250e-07\n",
      "Running logistic regression with parameter stage, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 825\n",
      "All counts for signature ID3: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.494937   2.037347  0.989812  7.268351e-01    1.000000e+00\n",
      "country[T.Brazil]     0.233411   4.626484  1.253693  6.186608e-01    1.000000e+00\n",
      "country[T.Canada]     0.135125   6.479365  1.326829  7.536990e-01    1.000000e+00\n",
      "country[T.Japan]      0.274088   5.695620  1.500454  5.149955e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.007653   8.932242  1.010245  6.814298e-01    1.000000e+00\n",
      "country[T.Poland]     0.368357  19.197922  3.706303  2.007941e-01    1.000000e+00\n",
      "country[T.Romania]    2.143412  13.598200  5.335051  3.763131e-04    1.049914e-01\n",
      "country[T.Russia]     0.317365   2.640268  0.941953  7.132277e-01    1.000000e+00\n",
      "country[T.Serbia]     0.066806   2.857094  0.633777  5.053559e-01    1.000000e+00\n",
      "country[T.UK]         0.387348   3.527640  1.226198  6.014675e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.766972   3.148831  1.535159  2.105207e-01    1.000000e+00\n",
      "stage                 0.538739   1.052085  0.764220  9.351801e-02    1.000000e+00\n",
      "age_group             0.855806   1.557421  1.149404  3.268930e-01    1.000000e+00\n",
      "Intercept             0.009441   0.088098  0.030610  3.370288e-13    9.403104e-11\n",
      "Running logistic regression with parameter stage, signature ID5\n",
      "Zero counts for signature ID5: 432\n",
      "All counts for signature ID5: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1016.0086 \n",
      "Link Function:         Logit             BIC:             -4751.5834\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -494.00   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -598.19   \n",
      "No. Observations:      863               Deviance:        988.01    \n",
      "Df Model:              13                Pearson chi2:    866.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3847   0.2610 -5.3054 0.0000 -1.8963 -0.8732\n",
      "sex[T.Male]           0.5262   0.1687  3.1186 0.0018  0.1955  0.8569\n",
      "country[T.Brazil]    -0.8283   0.3364 -2.4624 0.0138 -1.4876 -0.1690\n",
      "country[T.Canada]    -0.8046   0.4027 -1.9982 0.0457 -1.5939 -0.0154\n",
      "country[T.Japan]     -2.9150   0.5116 -5.6982 0.0000 -3.9176 -1.9123\n",
      "country[T.Lithuania] -0.1528   0.6371 -0.2398 0.8105 -1.4014  1.0959\n",
      "country[T.Poland]    -0.7804   0.6767 -1.1532 0.2488 -2.1068  0.5459\n",
      "country[T.Romania]    0.3551   0.3321  1.0692 0.2850 -0.2958  1.0059\n",
      "country[T.Russia]    -0.4178   0.2125 -1.9660 0.0493 -0.8343 -0.0013\n",
      "country[T.Serbia]    -0.1085   0.3074 -0.3529 0.7242 -0.7109  0.4940\n",
      "country[T.UK]        -0.3864   0.2516 -1.5357 0.1246 -0.8796  0.1068\n",
      "tobacco_ever[T.Yes]  -0.0405   0.1635 -0.2479 0.8042 -0.3610  0.2800\n",
      "stage                -0.2754   0.0709 -3.8865 0.0001 -0.4144 -0.1365\n",
      "age_group             0.8755   0.0810 10.8101 0.0000  0.7167  1.0342\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.150127  0.417629  0.250395  1.124390e-07    3.137049e-05\n",
      "sex[T.Male]           1.215930  2.355934  1.692528  1.816846e-03    5.069000e-01\n",
      "country[T.Brazil]     0.225915  0.844492  0.436788  1.379975e-02    1.000000e+00\n",
      "country[T.Canada]     0.203131  0.984731  0.447246  4.569774e-02    1.000000e+00\n",
      "country[T.Japan]      0.019888  0.147736  0.054205  1.211045e-08    3.378815e-06\n",
      "country[T.Lithuania]  0.246249  2.991739  0.858320  8.104745e-01    1.000000e+00\n",
      "country[T.Poland]     0.121631  1.726230  0.458217  2.488189e-01    1.000000e+00\n",
      "country[T.Romania]    0.743938  2.734407  1.426264  2.849747e-01    1.000000e+00\n",
      "country[T.Russia]     0.434170  0.998714  0.658492  4.929641e-02    1.000000e+00\n",
      "country[T.Serbia]     0.491204  1.638803  0.897211  7.241774e-01    1.000000e+00\n",
      "country[T.UK]         0.414949  1.112676  0.679488  1.246224e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.696952  1.323099  0.960280  8.042482e-01    1.000000e+00\n",
      "stage                 0.660763  0.872373  0.759231  1.017177e-04    2.837923e-02\n",
      "age_group             2.047759  2.812881  2.400021  3.083518e-27    8.603016e-25\n",
      "Running logistic regression with parameter stage, signature ID8\n",
      "Zero counts for signature ID8: 229\n",
      "All counts for signature ID8: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             937.9227  \n",
      "Link Function:         Logit             BIC:             -4829.6694\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -454.96   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -499.32   \n",
      "No. Observations:      863               Deviance:        909.92    \n",
      "Df Model:              13                Pearson chi2:    857.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.1476   0.2611  0.5656 0.5717 -0.3640  0.6593\n",
      "sex[T.Male]           0.4644   0.1743  2.6647 0.0077  0.1228  0.8060\n",
      "country[T.Brazil]    -0.7161   0.3448 -2.0768 0.0378 -1.3920 -0.0403\n",
      "country[T.Canada]    -0.2983   0.4388 -0.6798 0.4966 -1.1583  0.5617\n",
      "country[T.Japan]     -1.5548   0.4015 -3.8728 0.0001 -2.3417 -0.7680\n",
      "country[T.Lithuania]  1.0397   1.0744  0.9677 0.3332 -1.0661  3.1455\n",
      "country[T.Poland]    -0.1626   0.6525 -0.2492 0.8032 -1.4415  1.1162\n",
      "country[T.Romania]   -0.6974   0.3299 -2.1141 0.0345 -1.3440 -0.0509\n",
      "country[T.Russia]    -0.0685   0.2329 -0.2941 0.7687 -0.5250  0.3880\n",
      "country[T.Serbia]    -0.6649   0.3108 -2.1395 0.0324 -1.2740 -0.0558\n",
      "country[T.UK]        -0.2203   0.2848 -0.7736 0.4391 -0.7785  0.3378\n",
      "tobacco_ever[T.Yes]  -0.0242   0.1720 -0.1404 0.8883 -0.3613  0.3130\n",
      "stage                -0.1503   0.0734 -2.0477 0.0406 -0.2942 -0.0064\n",
      "age_group             0.5823   0.0776  7.5039 0.0000  0.4302  0.7344\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.694872   1.933475  1.159102  5.716946e-01    1.000000e+00\n",
      "sex[T.Male]           1.130686   2.239033  1.591114  7.706568e-03    1.000000e+00\n",
      "country[T.Brazil]     0.248581   0.960506  0.488634  3.781812e-02    1.000000e+00\n",
      "country[T.Canada]     0.314021   1.753665  0.742084  4.966200e-01    1.000000e+00\n",
      "country[T.Japan]      0.096161   0.463956  0.211221  1.075869e-04    3.001674e-02\n",
      "country[T.Lithuania]  0.344336  23.231043  2.828299  3.332106e-01    1.000000e+00\n",
      "country[T.Poland]     0.236584   3.053279  0.849916  8.031827e-01    1.000000e+00\n",
      "country[T.Romania]    0.260801   0.950413  0.497864  3.450398e-02    1.000000e+00\n",
      "country[T.Russia]     0.591552   1.474025  0.933789  7.686652e-01    1.000000e+00\n",
      "country[T.Serbia]     0.279714   0.945742  0.514332  3.239756e-02    1.000000e+00\n",
      "country[T.UK]         0.459101   1.401918  0.802261  4.391382e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.696775   1.367504  0.976137  8.883366e-01    1.000000e+00\n",
      "stage                 0.745098   0.993579  0.860415  4.058860e-02    1.000000e+00\n",
      "age_group             1.537591   2.084253  1.790176  6.195870e-14    1.728648e-11\n",
      "Running logistic regression with parameter stage, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig ID9, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 859\n",
      "All counts for signature ID9: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.151676  11.202688  1.027692  6.804641e-01        1.000000\n",
      "country[T.Brazil]     0.006312  11.302532  0.883480  5.619918e-01        1.000000\n",
      "country[T.Canada]     0.198308  22.152460  2.423433  4.173526e-01        1.000000\n",
      "country[T.Japan]      0.008272  15.849331  1.180466  6.259947e-01        1.000000\n",
      "country[T.Lithuania]  0.022795  44.769126  3.243961  3.892435e-01        1.000000\n",
      "country[T.Poland]     0.018536  39.663247  2.712866  4.528851e-01        1.000000\n",
      "country[T.Romania]    0.006130  10.916358  0.853586  6.501989e-01        1.000000\n",
      "country[T.Russia]     0.001862   3.625854  0.265003  2.987739e-01        1.000000\n",
      "country[T.Serbia]     0.205825  19.505793  2.379527  3.394249e-01        1.000000\n",
      "country[T.UK]         0.002532   4.575960  0.355600  3.966738e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.296924  23.461417  2.012880  3.761377e-01        1.000000\n",
      "stage                 0.585929   3.256759  1.346179  3.944237e-01        1.000000\n",
      "age_group             0.522150   3.165936  1.205227  5.847721e-01        1.000000\n",
      "Intercept             0.000081   0.057683  0.003821  6.574535e-07        0.000183\n",
      "Running logistic regression with parameter stage, signature ID11\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter stage, sig ID11, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 857\n",
      "All counts for signature ID11: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.191558   4.729690  0.950425  0.530774        1.000000\n",
      "country[T.Brazil]     0.006704  11.791734  0.931964  0.545330        1.000000\n",
      "country[T.Canada]     0.009761  20.810066  1.411733  0.694231        1.000000\n",
      "country[T.Japan]      0.011676  21.357137  1.640011  0.468543        1.000000\n",
      "country[T.Lithuania]  0.020070  38.238689  2.848259  0.399388        1.000000\n",
      "country[T.Poland]     0.032714  73.314983  4.771159  0.274887        1.000000\n",
      "country[T.Romania]    0.004955   8.602304  0.687302  0.472443        1.000000\n",
      "country[T.Russia]     0.264727   9.589163  1.474045  0.441228        1.000000\n",
      "country[T.Serbia]     0.005123   9.064595  0.713541  0.522008        1.000000\n",
      "country[T.UK]         0.133413  11.399213  1.481335  0.461072        1.000000\n",
      "tobacco_ever[T.Yes]   0.032326   1.783787  0.321032  0.152498        1.000000\n",
      "stage                 0.540928   2.103409  1.091459  0.475819        1.000000\n",
      "age_group             0.579266   2.455537  1.161120  0.476152        1.000000\n",
      "Intercept             0.000719   0.106768  0.012274  0.000003        0.000873\n",
      "Running logistic regression with parameter stage, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 858\n",
      "All counts for signature ID12: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.075317     2.496044   0.464106  0.295183        1.000000\n",
      "country[T.Brazil]     0.671800  1888.614433  12.845567  0.064451        1.000000\n",
      "country[T.Canada]     0.034487  1270.862632   6.609770  0.371401        1.000000\n",
      "country[T.Japan]      0.044489  1559.721922   8.327316  0.222056        1.000000\n",
      "country[T.Lithuania]  0.100406  3596.604928  18.974592  0.117019        1.000000\n",
      "country[T.Poland]     0.055343  2119.015234  10.843662  0.176251        1.000000\n",
      "country[T.Romania]    0.019515   680.350135   3.644841  0.307532        1.000000\n",
      "country[T.Russia]     0.509790   752.113921   5.514077  0.134766        1.000000\n",
      "country[T.Serbia]     0.013692   480.830332   2.566669  0.327231        1.000000\n",
      "country[T.UK]         0.354653   993.495820   6.765651  0.141834        1.000000\n",
      "tobacco_ever[T.Yes]   0.156924     5.260792   0.960620  0.532842        1.000000\n",
      "stage                 0.545198     2.387360   1.157358  0.493381        1.000000\n",
      "age_group             0.251007     1.256545   0.582592  0.138528        1.000000\n",
      "Intercept             0.000058     0.114744   0.008890  0.000010        0.002787\n",
      "Running logistic regression with parameter stage, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 847\n",
      "All counts for signature ID83C: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.261860      2.828264    0.855088  5.206122e-01    1.000000e+00\n",
      "country[T.Brazil]      0.022174    774.723987    4.145777  3.191666e-01    1.000000e+00\n",
      "country[T.Canada]      0.020415    739.121622    3.883499  4.831980e-01    1.000000e+00\n",
      "country[T.Japan]       0.030302   1070.500979    5.696741  2.718766e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.072025   2612.344810   13.724981  1.727105e-01    1.000000e+00\n",
      "country[T.Poland]      0.095992   3624.747595   18.662945  1.439441e-01    1.000000e+00\n",
      "country[T.Romania]    18.189862  18883.276426  145.468501  1.193443e-10    3.329705e-08\n",
      "country[T.Russia]      0.008116    282.794362    1.514845  5.211805e-01    1.000000e+00\n",
      "country[T.Serbia]      2.844230   4154.894760   30.557719  2.684384e-03    7.489433e-01\n",
      "country[T.UK]          0.009365    327.370836    1.751664  4.849038e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.341878      3.911749    1.158382  4.796645e-01    1.000000e+00\n",
      "stage                  0.860102      2.625577    1.482987  1.255856e-01    1.000000e+00\n",
      "age_group              1.103434      3.545584    1.887584  1.619532e-02    1.000000e+00\n",
      "Intercept              0.000002      0.004803    0.000298  3.299360e-18    9.205215e-16\n",
      "Running logistic regression with parameter stage, signature SBS_burden\n",
      "Zero counts for signature SBS_burden: 416\n",
      "All counts for signature SBS_burden: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             946.3007  \n",
      "Link Function:         Logit             BIC:             -4821.2914\n",
      "Dependent Variable:    SBS_burden_bool   Log-Likelihood:  -459.15   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -597.63   \n",
      "No. Observations:      863               Deviance:        918.30    \n",
      "Df Model:              13                Pearson chi2:    871.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.7110   0.2938 -9.2286 0.0000 -3.2868 -2.1353\n",
      "sex[T.Male]           0.6619   0.1770  3.7388 0.0002  0.3149  1.0089\n",
      "country[T.Brazil]    -0.2672   0.3533 -0.7564 0.4494 -0.9596  0.4252\n",
      "country[T.Canada]    -1.3350   0.4265 -3.1305 0.0017 -2.1708 -0.4992\n",
      "country[T.Japan]     -0.7642   0.4237 -1.8038 0.0713 -1.5945  0.0662\n",
      "country[T.Lithuania] -0.3945   0.6354 -0.6208 0.5347 -1.6399  0.8509\n",
      "country[T.Poland]    -0.4896   0.6730 -0.7275 0.4669 -1.8085  0.8294\n",
      "country[T.Romania]    2.2658   0.4533  4.9991 0.0000  1.3775  3.1542\n",
      "country[T.Russia]    -0.3175   0.2200 -1.4429 0.1491 -0.7487  0.1138\n",
      "country[T.Serbia]     0.4325   0.3234  1.3371 0.1812 -0.2015  1.0664\n",
      "country[T.UK]        -0.2681   0.2622 -1.0226 0.3065 -0.7820  0.2458\n",
      "tobacco_ever[T.Yes]   0.1642   0.1715  0.9576 0.3383 -0.1719  0.5003\n",
      "stage                 0.2801   0.0733  3.8242 0.0001  0.1366  0.4237\n",
      "age_group             1.0466   0.0871 12.0212 0.0000  0.8760  1.2173\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.037374   0.118215  0.066469  2.741031e-20    7.647477e-18\n",
      "sex[T.Male]           1.370142   2.742551  1.938475  1.849253e-04    5.159415e-02\n",
      "country[T.Brazil]     0.383031   1.529902  0.765506  4.494182e-01    1.000000e+00\n",
      "country[T.Canada]     0.114084   0.607038  0.263160  1.745273e-03    4.869312e-01\n",
      "country[T.Japan]      0.203002   1.068414  0.465714  7.126832e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.193998   2.341797  0.674021  5.347089e-01    1.000000e+00\n",
      "country[T.Poland]     0.163893   2.291945  0.612889  4.669245e-01    1.000000e+00\n",
      "country[T.Romania]    3.964865  23.433919  9.639104  5.761225e-07    1.607382e-04\n",
      "country[T.Russia]     0.472958   1.120508  0.727979  1.490596e-01    1.000000e+00\n",
      "country[T.Serbia]     0.817541   2.904771  1.541029  1.811931e-01    1.000000e+00\n",
      "country[T.UK]         0.457504   1.278603  0.764831  3.065103e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.842085   1.649153  1.178443  3.382713e-01    1.000000e+00\n",
      "stage                 1.146334   1.527650  1.323328  1.311942e-04    3.660317e-02\n",
      "age_group             2.401199   3.377895  2.847981  2.750962e-33    7.675183e-31\n",
      "Running logistic regression with parameter stage, signature DBS_burden\n",
      "Zero counts for signature DBS_burden: 468\n",
      "All counts for signature DBS_burden: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             929.7555  \n",
      "Link Function:         Logit             BIC:             -4145.8063\n",
      "Dependent Variable:    DBS_burden_bool   Log-Likelihood:  -450.88   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -535.62   \n",
      "No. Observations:      773               Deviance:        901.76    \n",
      "Df Model:              13                Pearson chi2:    765.      \n",
      "Df Residuals:          759               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.6954   0.2734 -6.2005 0.0000 -2.2313 -1.1595\n",
      "sex[T.Male]           0.5109   0.1767  2.8908 0.0038  0.1645  0.8574\n",
      "country[T.Brazil]    -0.1676   0.3333 -0.5028 0.6151 -0.8209  0.4857\n",
      "country[T.Canada]    -1.1838   0.4172 -2.8372 0.0046 -2.0015 -0.3660\n",
      "country[T.Japan]     -1.6971   0.4314 -3.9342 0.0001 -2.5426 -0.8516\n",
      "country[T.Lithuania]  0.4533   0.6566  0.6904 0.4900 -0.8337  1.7403\n",
      "country[T.Poland]    -0.3063   0.6439 -0.4757 0.6343 -1.5682  0.9557\n",
      "country[T.Romania]    1.3906   0.4110  3.3833 0.0007  0.5850  2.1961\n",
      "country[T.Russia]    -0.2233   0.2203 -1.0135 0.3108 -0.6551  0.2085\n",
      "country[T.Serbia]     0.3439   0.3075  1.1184 0.2634 -0.2588  0.9465\n",
      "country[T.UK]         0.0480   0.2803  0.1714 0.8639 -0.5013  0.5973\n",
      "tobacco_ever[T.Yes]   0.0960   0.1716  0.5598 0.5756 -0.2402  0.4323\n",
      "stage                -0.1098   0.0729 -1.5052 0.1323 -0.2528  0.0332\n",
      "age_group             0.8120   0.0826  9.8325 0.0000  0.6502  0.9739\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.107384  0.313641  0.183521  5.627712e-10    1.570132e-07\n",
      "sex[T.Male]           1.178829  2.356943  1.666863  3.842807e-03    1.000000e+00\n",
      "country[T.Brazil]     0.440022  1.625380  0.845697  6.151245e-01    1.000000e+00\n",
      "country[T.Canada]     0.135127  0.693506  0.306123  4.551808e-03    1.000000e+00\n",
      "country[T.Japan]      0.078665  0.426718  0.183215  8.346454e-05    2.328661e-02\n",
      "country[T.Lithuania]  0.434454  5.699087  1.573528  4.899643e-01    1.000000e+00\n",
      "country[T.Poland]     0.208423  2.600401  0.736194  6.343096e-01    1.000000e+00\n",
      "country[T.Romania]    1.794986  8.990199  4.017123  7.162754e-04    1.998408e-01\n",
      "country[T.Russia]     0.519374  1.231884  0.799880  3.108444e-01    1.000000e+00\n",
      "country[T.Serbia]     0.772013  2.576565  1.410369  2.634087e-01    1.000000e+00\n",
      "country[T.UK]         0.605772  1.817264  1.049213  8.638960e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.786457  1.540823  1.100814  5.755908e-01    1.000000e+00\n",
      "stage                 0.776658  1.033730  0.896022  1.322801e-01    1.000000e+00\n",
      "age_group             1.915879  2.648296  2.252513  8.158333e-23    2.276175e-20\n",
      "Running logistic regression with parameter stage, signature ID_burden\n",
      "Zero counts for signature ID_burden: 415\n",
      "All counts for signature ID_burden: 863\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1005.0978 \n",
      "Link Function:         Logit             BIC:             -4762.4943\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -488.55   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -597.55   \n",
      "No. Observations:      863               Deviance:        977.10    \n",
      "Df Model:              13                Pearson chi2:    864.      \n",
      "Df Residuals:          849               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.6618   0.2657 -6.2547 0.0000 -2.1826 -1.1411\n",
      "sex[T.Male]           0.5296   0.1700  3.1145 0.0018  0.1963  0.8629\n",
      "country[T.Brazil]    -0.3907   0.3367 -1.1605 0.2458 -1.0506  0.2692\n",
      "country[T.Canada]    -1.0680   0.4106 -2.6009 0.0093 -1.8728 -0.2632\n",
      "country[T.Japan]     -2.6578   0.4869 -5.4583 0.0000 -3.6122 -1.7034\n",
      "country[T.Lithuania] -0.5946   0.6206 -0.9581 0.3380 -1.8110  0.6218\n",
      "country[T.Poland]    -0.9015   0.6792 -1.3272 0.1844 -2.2328  0.4298\n",
      "country[T.Romania]    0.7825   0.3540  2.2104 0.0271  0.0887  1.4763\n",
      "country[T.Russia]    -0.3418   0.2132 -1.6034 0.1088 -0.7596  0.0760\n",
      "country[T.Serbia]    -0.1265   0.3108 -0.4069 0.6841 -0.7357  0.4827\n",
      "country[T.UK]        -0.2833   0.2542 -1.1144 0.2651 -0.7815  0.2150\n",
      "tobacco_ever[T.Yes]  -0.1596   0.1644 -0.9707 0.3317 -0.4818  0.1626\n",
      "stage                -0.0401   0.0702 -0.5709 0.5680 -0.1778  0.0976\n",
      "age_group             0.9246   0.0820 11.2776 0.0000  0.7639  1.0853\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.112753  0.319479  0.189796  3.983788e-10    1.111477e-07\n",
      "sex[T.Male]           1.216914  2.369932  1.698235  1.842527e-03    5.140652e-01\n",
      "country[T.Brazil]     0.349739  1.308858  0.676579  2.458447e-01    1.000000e+00\n",
      "country[T.Canada]     0.153688  0.768600  0.343693  9.298260e-03    1.000000e+00\n",
      "country[T.Japan]      0.026993  0.182057  0.070102  4.808027e-08    1.341439e-05\n",
      "country[T.Lithuania]  0.163493  1.862237  0.551782  3.380186e-01    1.000000e+00\n",
      "country[T.Poland]     0.107233  1.536915  0.405965  1.844353e-01    1.000000e+00\n",
      "country[T.Romania]    1.092708  4.376658  2.186872  2.707620e-02    1.000000e+00\n",
      "country[T.Russia]     0.467870  1.078969  0.710505  1.088472e-01    1.000000e+00\n",
      "country[T.Serbia]     0.479156  1.620525  0.881183  6.840630e-01    1.000000e+00\n",
      "country[T.UK]         0.457703  1.239808  0.753302  2.651147e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.617644  1.176612  0.852483  3.316748e-01    1.000000e+00\n",
      "stage                 0.837148  1.102475  0.960695  5.680480e-01    1.000000e+00\n",
      "age_group             2.146647  2.960274  2.520846  1.692501e-29    4.722078e-27\n",
      "Using below/above median model for signature SBS1, its frequency is 0.77\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.85\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS1\n",
      "Zero counts for signature SBS1: 481\n",
      "All counts for signature SBS1: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1261.1332 \n",
      "Link Function:         Logit             BIC:             -5270.8385\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -616.57   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1233.1    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.9710   0.2209 -4.3948 0.0000 -1.4040 -0.5380\n",
      "tobacco_ever[T.Yes]  -0.0132   0.1443 -0.0918 0.9269 -0.2961  0.2696\n",
      "sex[T.Male]          -0.0397   0.1451 -0.2732 0.7847 -0.3241  0.2448\n",
      "country[T.Brazil]     0.1257   0.2476  0.5080 0.6115 -0.3595  0.6110\n",
      "country[T.Canada]     0.7523   0.2857  2.6332 0.0085  0.1923  1.3123\n",
      "country[T.Japan]      0.2846   0.3737  0.7615 0.4463 -0.4478  1.0170\n",
      "country[T.Lithuania]  0.4754   0.5427  0.8759 0.3811 -0.5883  1.5391\n",
      "country[T.Poland]    -0.5277   0.6304 -0.8371 0.4025 -1.7633  0.7078\n",
      "country[T.Romania]   -1.0465   0.3173 -3.2978 0.0010 -1.6684 -0.4245\n",
      "country[T.Russia]     0.4980   0.1954  2.5491 0.0108  0.1151  0.8809\n",
      "country[T.Serbia]    -0.7507   0.3071 -2.4444 0.0145 -1.3526 -0.1488\n",
      "country[T.Thailand]   0.1631   0.9369  0.1741 0.8618 -1.6733  1.9995\n",
      "country[T.UK]         0.4062   0.2338  1.7370 0.0824 -0.0521  0.8645\n",
      "age_group             0.4416   0.0635  6.9582 0.0000  0.3172  0.5660\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.245601  0.583940  0.378703  1.108950e-05    3.093971e-03\n",
      "tobacco_ever[T.Yes]   0.743735  1.309426  0.986846  9.268897e-01    1.000000e+00\n",
      "sex[T.Male]           0.723148  1.277402  0.961120  7.846898e-01    1.000000e+00\n",
      "country[T.Brazil]     0.698058  1.842186  1.133998  6.114804e-01    1.000000e+00\n",
      "country[T.Canada]     1.212095  3.714866  2.121973  8.458929e-03    1.000000e+00\n",
      "country[T.Japan]      0.639006  2.764914  1.329209  4.463316e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.555246  4.660370  1.608617  3.810815e-01    1.000000e+00\n",
      "country[T.Poland]     0.171483  2.029557  0.589945  4.025131e-01    1.000000e+00\n",
      "country[T.Romania]    0.188550  0.654086  0.351180  9.745417e-04    2.718971e-01\n",
      "country[T.Russia]     1.121984  2.413139  1.645449  1.080012e-02    1.000000e+00\n",
      "country[T.Serbia]     0.258574  0.861775  0.472052  1.451066e-02    1.000000e+00\n",
      "country[T.Thailand]   0.187634  7.385229  1.177165  8.617973e-01    1.000000e+00\n",
      "country[T.UK]         0.949202  2.373826  1.501080  8.238373e-02    1.000000e+00\n",
      "age_group             1.373265  1.761123  1.555149  3.445975e-12    9.614271e-10\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 955\n",
      "All counts for signature SBS2: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.191248    4.822484  0.945485  5.885034e-01        1.000000\n",
      "sex[T.Male]           0.252354    7.165139  1.202918  5.531925e-01        1.000000\n",
      "country[T.Brazil]     0.003978    6.902463  0.551651  4.723395e-01        1.000000\n",
      "country[T.Canada]     0.004983    8.790606  0.693510  7.830516e-01        1.000000\n",
      "country[T.Japan]      0.009643   17.752792  1.357391  4.888245e-01        1.000000\n",
      "country[T.Lithuania]  0.022258   41.083299  3.130414  3.530202e-01        1.000000\n",
      "country[T.Poland]     0.027188   52.420407  3.855471  3.105754e-01        1.000000\n",
      "country[T.Romania]    0.005770    9.978882  0.799465  5.391144e-01        1.000000\n",
      "country[T.Russia]     0.177544    8.351672  1.213727  5.387321e-01        1.000000\n",
      "country[T.Serbia]     0.005567    9.879966  0.776013  5.012654e-01        1.000000\n",
      "country[T.Thailand]   0.060409  148.684751  9.072362  2.066667e-01        1.000000\n",
      "country[T.UK]         0.343534   14.921274  2.265973  2.574806e-01        1.000000\n",
      "age_group             0.505794    2.116040  1.019687  5.552563e-01        1.000000\n",
      "Intercept             0.000641    0.075838  0.009654  3.157223e-08        0.000009\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS4\n",
      "Zero counts for signature SBS4: 410\n",
      "All counts for signature SBS4: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1253.1215 \n",
      "Link Function:         Logit             BIC:             -5278.8502\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -612.56   \n",
      "Date:                  2024-02-08 17:22  LL-Null:         -655.73   \n",
      "No. Observations:      961               Deviance:        1225.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.2898   0.2182 -1.3279 0.1842 -0.7174  0.1379\n",
      "tobacco_ever[T.Yes]   0.6616   0.1453  4.5541 0.0000  0.3769  0.9463\n",
      "sex[T.Male]          -0.2000   0.1457 -1.3726 0.1699 -0.4855  0.0856\n",
      "country[T.Brazil]    -0.6365   0.2512 -2.5340 0.0113 -1.1288 -0.1442\n",
      "country[T.Canada]    -0.5623   0.2772 -2.0285 0.0425 -1.1056 -0.0190\n",
      "country[T.Japan]      0.8305   0.4738  1.7530 0.0796 -0.0980  1.7590\n",
      "country[T.Lithuania] -0.1917   0.5429 -0.3531 0.7240 -1.2558  0.8724\n",
      "country[T.Poland]     0.7214   0.6908  1.0443 0.2964 -0.6326  2.0754\n",
      "country[T.Romania]   -0.9480   0.2913 -3.2545 0.0011 -1.5190 -0.3771\n",
      "country[T.Russia]    -0.3595   0.1957 -1.8364 0.0663 -0.7431  0.0242\n",
      "country[T.Serbia]    -0.5464   0.2831 -1.9302 0.0536 -1.1013  0.0084\n",
      "country[T.Thailand]  -0.3463   0.9311 -0.3719 0.7099 -2.1711  1.4786\n",
      "country[T.UK]        -0.0538   0.2434 -0.2211 0.8250 -0.5308  0.4232\n",
      "age_group             0.3406   0.0626  5.4412 0.0000  0.2179  0.4633\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.488003  1.147892  0.748448  1.842192e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.457702  2.576233  1.937881  5.261225e-06        0.001468\n",
      "sex[T.Male]           0.615366  1.089350  0.818748  1.698855e-01        1.000000\n",
      "country[T.Brazil]     0.323418  0.865727  0.529142  1.127735e-02        1.000000\n",
      "country[T.Canada]     0.331017  0.981188  0.569904  4.251165e-02        1.000000\n",
      "country[T.Japan]      0.906620  5.806870  2.294477  7.959572e-02        1.000000\n",
      "country[T.Lithuania]  0.284835  2.392621  0.825532  7.239855e-01        1.000000\n",
      "country[T.Poland]     0.531202  7.968059  2.057341  2.963683e-01        1.000000\n",
      "country[T.Romania]    0.218935  0.685851  0.387501  1.136001e-03        0.316944\n",
      "country[T.Russia]     0.475620  1.024477  0.698042  6.629485e-02        1.000000\n",
      "country[T.Serbia]     0.332439  1.008465  0.579010  5.358327e-02        1.000000\n",
      "country[T.Thailand]   0.114050  4.386586  0.707313  7.099492e-01        1.000000\n",
      "country[T.UK]         0.588115  1.526834  0.947604  8.249923e-01        1.000000\n",
      "age_group             1.243511  1.589371  1.405845  5.291094e-08        0.000015\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS5\n",
      "Zero counts for signature SBS5: 884\n",
      "All counts for signature SBS5: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             510.2984  \n",
      "Link Function:         Logit             BIC:             -6021.6734\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -241.15   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -268.19   \n",
      "No. Observations:      961               Deviance:        482.30    \n",
      "Df Model:              13                Pearson chi2:    941.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.6946   0.4520 -8.1732 0.0000 -4.5806 -2.8086\n",
      "tobacco_ever[T.Yes]   0.4243   0.2694  1.5750 0.1153 -0.1037  0.9523\n",
      "sex[T.Male]          -0.1023   0.2673 -0.3829 0.7018 -0.6261  0.4215\n",
      "country[T.Brazil]     0.2327   0.3786  0.6146 0.5388 -0.5093  0.9747\n",
      "country[T.Canada]    -0.6398   0.5130 -1.2470 0.2124 -1.6453  0.3658\n",
      "country[T.Japan]      0.1984   0.5090  0.3897 0.6968 -0.7993  1.1960\n",
      "country[T.Lithuania]  0.5316   0.6869  0.7739 0.4390 -0.8148  1.8780\n",
      "country[T.Poland]    -0.2157   1.0868 -0.1985 0.8427 -2.3458  1.9143\n",
      "country[T.Romania]   -1.4158   0.7513 -1.8843 0.0595 -2.8884  0.0568\n",
      "country[T.Russia]    -1.1240   0.4690 -2.3966 0.0165 -2.0432 -0.2048\n",
      "country[T.Serbia]    -0.4719   0.5621 -0.8395 0.4012 -1.5736  0.6298\n",
      "country[T.Thailand]   0.4677   1.1586  0.4036 0.6865 -1.8032  2.7385\n",
      "country[T.UK]        -0.6812   0.4275 -1.5932 0.1111 -1.5191  0.1568\n",
      "age_group             0.6168   0.1244  4.9594 0.0000  0.3730  0.8606\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.010249   0.060289  0.024858  3.003636e-16    8.380144e-14\n",
      "tobacco_ever[T.Yes]   0.901474   2.591741  1.528524  1.152672e-01    1.000000e+00\n",
      "sex[T.Male]           0.534654   1.524229  0.902738  7.018188e-01    1.000000e+00\n",
      "country[T.Brazil]     0.600891   2.650256  1.261949  5.388485e-01    1.000000e+00\n",
      "country[T.Canada]     0.192948   1.441634  0.527409  2.123920e-01    1.000000e+00\n",
      "country[T.Japan]      0.449657   3.306817  1.219399  6.967587e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.442738   6.540428  1.701674  4.390034e-01    1.000000e+00\n",
      "country[T.Poland]     0.095771   6.782426  0.805954  8.426530e-01    1.000000e+00\n",
      "country[T.Romania]    0.055667   1.058494  0.242742  5.952396e-02    1.000000e+00\n",
      "country[T.Russia]     0.129612   0.814824  0.324979  1.654771e-02    1.000000e+00\n",
      "country[T.Serbia]     0.207306   1.877208  0.623824  4.011752e-01    1.000000e+00\n",
      "country[T.Thailand]   0.164774  15.463515  1.596243  6.864825e-01    1.000000e+00\n",
      "country[T.UK]         0.218902   1.169773  0.506030  1.111166e-01    1.000000e+00\n",
      "age_group             1.452148   2.364508  1.853002  7.070953e-07    1.972796e-04\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 916\n",
      "All counts for signature SBS12: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]    0.309222    1.655433    0.723576  4.287125e-01    1.000000e+00\n",
      "sex[T.Male]            0.465071    2.524265    1.070095  8.164948e-01    1.000000e+00\n",
      "country[T.Brazil]      0.494468    9.464414    2.254001  2.599147e-01    1.000000e+00\n",
      "country[T.Canada]      1.311769   18.515799    4.800254  1.869017e-02    1.000000e+00\n",
      "country[T.Japan]      46.327813  528.814534  140.847779  1.974007e-24    5.507481e-22\n",
      "country[T.Lithuania]   0.011910   16.231686    1.607199  6.403008e-01    1.000000e+00\n",
      "country[T.Poland]      0.018979   26.673070    2.573986  5.236235e-01    1.000000e+00\n",
      "country[T.Romania]     0.370223   10.016911    2.165140  3.358430e-01    1.000000e+00\n",
      "country[T.Russia]      0.122448    3.327164    0.717194  6.234463e-01    1.000000e+00\n",
      "country[T.Serbia]      0.135009    7.588342    1.366899  6.606946e-01    1.000000e+00\n",
      "country[T.Thailand]    0.029118   49.255718    4.096510  4.086398e-01    1.000000e+00\n",
      "country[T.UK]          0.220563    5.914847    1.284845  6.394585e-01    1.000000e+00\n",
      "age_group              0.888536    1.747332    1.238523  2.009453e-01    1.000000e+00\n",
      "Intercept              0.002885    0.044899    0.012657  2.796198e-16    7.801392e-14\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 807\n",
      "All counts for signature SBS13: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.836646  1.749752  1.208187  3.109094e-01    1.000000e+00\n",
      "sex[T.Male]           1.127287  2.462790  1.655735  9.779735e-03    1.000000e+00\n",
      "country[T.Brazil]     0.418998  1.593755  0.839764  5.928268e-01    1.000000e+00\n",
      "country[T.Canada]     0.392739  1.650584  0.832803  6.101081e-01    1.000000e+00\n",
      "country[T.Japan]      0.115561  1.246694  0.442444  1.294436e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.343460  4.304409  1.381843  6.027506e-01    1.000000e+00\n",
      "country[T.Poland]     0.001402  1.417840  0.180827  1.215852e-01    1.000000e+00\n",
      "country[T.Romania]    0.039471  0.602227  0.195700  2.400098e-03    6.696274e-01\n",
      "country[T.Russia]     0.945758  2.429837  1.513600  8.343357e-02    1.000000e+00\n",
      "country[T.Serbia]     0.311937  1.552226  0.732883  4.239265e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004099  5.088425  0.545066  6.456467e-01    1.000000e+00\n",
      "country[T.UK]         0.675298  2.080448  1.196862  5.188083e-01    1.000000e+00\n",
      "age_group             0.912975  1.257242  1.070513  3.967936e-01    1.000000e+00\n",
      "Intercept             0.063442  0.205292  0.115804  5.122332e-15    1.429131e-12\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 890\n",
      "All counts for signature SBS18: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.453154   1.236685  0.750254  2.528026e-01    1.000000e+00\n",
      "sex[T.Male]           1.032579   3.080587  1.753911  3.640008e-02    1.000000e+00\n",
      "country[T.Brazil]     0.394458   2.113555  0.953939  8.081706e-01    1.000000e+00\n",
      "country[T.Canada]     0.206497   1.769045  0.673768  4.438505e-01    1.000000e+00\n",
      "country[T.Japan]      0.151010   2.572015  0.775389  6.484069e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.375509   7.239441  2.005130  3.542966e-01    1.000000e+00\n",
      "country[T.Poland]     0.119365   5.167568  1.139520  7.646379e-01    1.000000e+00\n",
      "country[T.Romania]    0.327200   2.382710  0.958571  8.123949e-01    1.000000e+00\n",
      "country[T.Russia]     0.294839   1.231669  0.613787  1.660127e-01    1.000000e+00\n",
      "country[T.Serbia]     0.282644   2.070118  0.830477  6.457819e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009301  11.929475  1.243455  7.744472e-01    1.000000e+00\n",
      "country[T.UK]         0.350881   1.850024  0.842125  6.241525e-01    1.000000e+00\n",
      "age_group             0.668749   1.027963  0.829600  8.462689e-02    1.000000e+00\n",
      "Intercept             0.052823   0.235230  0.114568  3.080824e-10    8.595499e-08\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 956\n",
      "All counts for signature SBS21: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.256798      8.344817   1.351663  5.229939e-01        1.000000\n",
      "sex[T.Male]           0.171399      5.234059   0.884011  5.164416e-01        1.000000\n",
      "country[T.Brazil]     0.013726    474.702256   2.552817  4.552350e-01        1.000000\n",
      "country[T.Canada]     0.017881    621.014926   3.332013  5.462051e-01        1.000000\n",
      "country[T.Japan]      0.039746   1393.834621   7.442002  2.536414e-01        1.000000\n",
      "country[T.Lithuania]  0.090397   3227.197637  17.071964  1.699111e-01        1.000000\n",
      "country[T.Poland]     0.077872   2797.072690  14.762668  1.301963e-01        1.000000\n",
      "country[T.Romania]    1.720567   3002.094610  21.612695  1.291650e-02        1.000000\n",
      "country[T.Russia]     0.159592    456.700551   3.087434  3.250239e-01        1.000000\n",
      "country[T.Serbia]     0.017082    596.315789   3.191987  3.503634e-01        1.000000\n",
      "country[T.Thailand]   0.295495  12443.897451  60.439229  7.158695e-02        1.000000\n",
      "country[T.UK]         0.929098   1625.646851  11.703370  4.658703e-02        1.000000\n",
      "age_group             0.316170      1.486701   0.695730  2.720115e-01        1.000000\n",
      "Intercept             0.000025      0.054072   0.004006  1.798485e-08        0.000005\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 890\n",
      "All counts for signature SBS22: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]     0.456179     2.069096    0.966920  7.453117e-01    1.000000e+00\n",
      "sex[T.Male]             0.288149     1.254628    0.605967  1.688922e-01    1.000000e+00\n",
      "country[T.Brazil]       1.161096    76.379659    7.267801  3.110819e-02    1.000000e+00\n",
      "country[T.Canada]       0.844084    73.194481    6.543899  7.031701e-02    1.000000e+00\n",
      "country[T.Japan]        0.014619    42.311241    2.168091  5.121666e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.027413    82.647569    4.115478  3.546407e-01    1.000000e+00\n",
      "country[T.Poland]       0.061841   195.494053    9.407364  2.296384e-01    1.000000e+00\n",
      "country[T.Romania]    128.038297  5176.267656  547.352269  6.104767e-40    1.703230e-37\n",
      "country[T.Russia]       0.003760    10.545667    0.552633  6.128813e-01    1.000000e+00\n",
      "country[T.Serbia]      18.195928   737.528686   78.406690  8.170917e-13    2.279686e-10\n",
      "country[T.Thailand]    18.368925  2532.473535  169.445899  9.369705e-06    2.614148e-03\n",
      "country[T.UK]           0.174731    27.511843    2.192677  4.097025e-01    1.000000e+00\n",
      "age_group               1.418144     2.808220    1.963569  2.440469e-05    6.808909e-03\n",
      "Intercept               0.000132     0.007934    0.001449  1.971002e-27    5.499097e-25\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 955\n",
      "All counts for signature SBS44: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.388738     10.214761   1.781457  3.630651e-01    1.000000e+00\n",
      "sex[T.Male]           0.245220      6.202517   1.106275  5.186582e-01    1.000000e+00\n",
      "country[T.Brazil]     0.435796   1223.163842   8.319516  1.234115e-01    1.000000e+00\n",
      "country[T.Canada]     0.016995    587.167274   3.158867  5.678281e-01    1.000000e+00\n",
      "country[T.Japan]      0.036531   1277.975854   6.832235  2.586105e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.093085   3313.692073  17.552693  1.670946e-01    1.000000e+00\n",
      "country[T.Poland]     0.079940   2863.346515  15.132694  1.319203e-01    1.000000e+00\n",
      "country[T.Romania]    1.792936   3129.350824  22.524720  1.191408e-02    1.000000e+00\n",
      "country[T.Russia]     0.175472    500.611598   3.388023  3.078604e-01    1.000000e+00\n",
      "country[T.Serbia]     0.018077    630.899722   3.377359  3.477139e-01    1.000000e+00\n",
      "country[T.Thailand]   0.333037  13683.059176  67.110527  6.766498e-02    1.000000e+00\n",
      "country[T.UK]         0.906548   1583.909587  11.404388  4.740363e-02    1.000000e+00\n",
      "age_group             0.365681      1.498940   0.747648  3.118490e-01    1.000000e+00\n",
      "Intercept             0.000016      0.034502   0.002519  4.743909e-10    1.323550e-07\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 481\n",
      "All counts for signature SBS1536A: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.813475  1.474020  1.094876  5.481547e-01    1.000000e+00\n",
      "sex[T.Male]           1.345606  2.466747  1.818567  9.359018e-05    2.611166e-02\n",
      "country[T.Brazil]     0.179310  0.508878  0.303622  5.385036e-06    1.502425e-03\n",
      "country[T.Canada]     0.154877  0.489634  0.276796  9.713841e-06    2.710162e-03\n",
      "country[T.Japan]      0.026821  0.157798  0.067983  4.294739e-11    1.198232e-08\n",
      "country[T.Lithuania]  0.379517  4.021723  1.137340  8.111542e-01    1.000000e+00\n",
      "country[T.Poland]     0.304948  3.450321  0.990095  9.401667e-01    1.000000e+00\n",
      "country[T.Romania]    0.207614  0.694228  0.380429  1.676365e-03    4.677058e-01\n",
      "country[T.Russia]     0.207702  0.470869  0.313787  1.610557e-08    4.493454e-06\n",
      "country[T.Serbia]     0.277056  0.890186  0.497588  1.872282e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000200  0.255256  0.026803  5.399007e-04    1.506323e-01\n",
      "country[T.UK]         0.274757  0.726662  0.447403  1.154794e-03    3.221876e-01\n",
      "age_group             1.762183  2.326847  2.019679  7.781287e-27    2.170979e-24\n",
      "Intercept             0.233780  0.584918  0.371289  1.724324e-05    4.810865e-03\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 481\n",
      "All counts for signature SBS1536B: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1266.0601 \n",
      "Link Function:         Logit             BIC:             -5265.9117\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -619.03   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1238.1    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1719   0.2241 -5.2287 0.0000 -1.6112 -0.7326\n",
      "tobacco_ever[T.Yes]   0.3441   0.1447  2.3781 0.0174  0.0605  0.6277\n",
      "sex[T.Male]          -0.0747   0.1449 -0.5154 0.6063 -0.3586  0.2093\n",
      "country[T.Brazil]    -0.6812   0.2610 -2.6097 0.0091 -1.1928 -0.1696\n",
      "country[T.Canada]    -0.5143   0.2802 -1.8359 0.0664 -1.0634  0.0348\n",
      "country[T.Japan]     -0.4021   0.3740 -1.0751 0.2823 -1.1352  0.3310\n",
      "country[T.Lithuania] -0.1763   0.5296 -0.3330 0.7391 -1.2143  0.8616\n",
      "country[T.Poland]    -0.3266   0.6127 -0.5331 0.5940 -1.5275  0.8742\n",
      "country[T.Romania]    0.1992   0.2921  0.6821 0.4952 -0.3732  0.7717\n",
      "country[T.Russia]     0.4117   0.1968  2.0915 0.0365  0.0259  0.7974\n",
      "country[T.Serbia]     0.1821   0.2860  0.6366 0.5244 -0.3784  0.7426\n",
      "country[T.Thailand]  -0.7464   0.9380 -0.7957 0.4262 -2.5848  1.0921\n",
      "country[T.UK]         0.2838   0.2371  1.1970 0.2313 -0.1809  0.7484\n",
      "age_group             0.5173   0.0644  8.0272 0.0000  0.3910  0.6436\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.199651  0.480651  0.309778  1.707273e-07    4.763291e-05\n",
      "tobacco_ever[T.Yes]   1.062375  1.873292  1.410723  1.740030e-02    1.000000e+00\n",
      "sex[T.Male]           0.698665  1.232758  0.928055  6.062520e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303357  0.843996  0.505996  9.061854e-03    1.000000e+00\n",
      "country[T.Canada]     0.345266  1.035380  0.597898  6.637794e-02    1.000000e+00\n",
      "country[T.Japan]      0.321356  1.392304  0.668899  2.823246e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.296918  2.366952  0.838327  7.391365e-01    1.000000e+00\n",
      "country[T.Poland]     0.217077  2.397014  0.721344  5.939527e-01    1.000000e+00\n",
      "country[T.Romania]    0.688498  2.163458  1.220466  4.951727e-01    1.000000e+00\n",
      "country[T.Russia]     1.026218  2.219810  1.509308  3.648736e-02    1.000000e+00\n",
      "country[T.Serbia]     0.684922  2.101322  1.199684  5.243755e-01    1.000000e+00\n",
      "country[T.Thailand]   0.075414  2.980403  0.474092  4.262062e-01    1.000000e+00\n",
      "country[T.UK]         0.834529  2.113692  1.328133  2.313177e-01    1.000000e+00\n",
      "age_group             1.478438  1.903310  1.677476  9.972505e-16    2.782329e-13\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 845\n",
      "All counts for signature SBS1536F: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.483592   1.114048  0.735575  1.466068e-01    1.000000e+00\n",
      "sex[T.Male]           0.650125   1.488847  0.980584  9.055966e-01    1.000000e+00\n",
      "country[T.Brazil]     0.730990   3.070220  1.525799  2.509571e-01    1.000000e+00\n",
      "country[T.Canada]     1.451735   5.859918  2.941631  3.144730e-03    8.773797e-01\n",
      "country[T.Japan]      0.601385   4.635639  1.801791  2.691022e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.598493   7.838257  2.448556  1.899747e-01    1.000000e+00\n",
      "country[T.Poland]     0.002879   2.985384  0.372925  4.248485e-01    1.000000e+00\n",
      "country[T.Romania]    0.074851   1.200943  0.376888  1.052973e-01    1.000000e+00\n",
      "country[T.Russia]     0.910607   2.859994  1.604777  1.013273e-01    1.000000e+00\n",
      "country[T.Serbia]     0.280697   2.018363  0.818527  6.669293e-01    1.000000e+00\n",
      "country[T.Thailand]   0.283356  16.955946  2.935053  3.082063e-01    1.000000e+00\n",
      "country[T.UK]         0.770335   3.030079  1.547576  2.126240e-01    1.000000e+00\n",
      "age_group             0.820592   1.166748  0.977949  7.909412e-01    1.000000e+00\n",
      "Intercept             0.066269   0.240694  0.128793  9.824710e-12    2.741094e-09\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 866\n",
      "All counts for signature SBS1536I: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]    0.578338    2.291137    1.144543  4.959587e-01    1.000000e+00\n",
      "sex[T.Male]            0.455980    1.761877    0.896424  5.292190e-01    1.000000e+00\n",
      "country[T.Brazil]      0.449911    7.811765    2.002186  2.600296e-01    1.000000e+00\n",
      "country[T.Canada]      0.002327    2.808592    0.307878  3.516342e-01    1.000000e+00\n",
      "country[T.Japan]       0.148192    8.165945    1.507134  4.656675e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.385815   23.051682    4.034247  1.569276e-01    1.000000e+00\n",
      "country[T.Poland]      0.017815   25.360794    2.431331  4.287710e-01    1.000000e+00\n",
      "country[T.Romania]    72.011661  655.727301  198.557016  3.093870e-39    8.631898e-37\n",
      "country[T.Russia]      0.050174    2.577508    0.497705  3.258198e-01    1.000000e+00\n",
      "country[T.Serbia]     29.923433  257.454199   80.465260  7.883507e-25    2.199498e-22\n",
      "country[T.Thailand]    7.987582  412.561786   53.436827  9.145558e-05    2.551611e-02\n",
      "country[T.UK]          0.001373    1.647099    0.181438  1.155112e-01    1.000000e+00\n",
      "age_group              1.568936    2.968918    2.128238  2.988296e-07    8.337345e-05\n",
      "Intercept              0.000745    0.011855    0.003273  1.557038e-29    4.344136e-27\n",
      "Running logistic regression with parameter tobacco_ever, signature DBS2\n",
      "Zero counts for signature DBS2: 560\n",
      "All counts for signature DBS2: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1267.8276 \n",
      "Link Function:         Logit             BIC:             -5264.1441\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -619.91   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -652.90   \n",
      "No. Observations:      961               Deviance:        1239.8    \n",
      "Df Model:              13                Pearson chi2:    961.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2612   0.2242 -5.6261 0.0000 -1.7006 -0.8219\n",
      "tobacco_ever[T.Yes]   0.7451   0.1443  5.1644 0.0000  0.4623  1.0279\n",
      "sex[T.Male]           0.3185   0.1454  2.1901 0.0285  0.0335  0.6035\n",
      "country[T.Brazil]    -0.2595   0.2532 -1.0249 0.3054 -0.7559  0.2368\n",
      "country[T.Canada]    -0.4443   0.2776 -1.6003 0.1095 -0.9885  0.0999\n",
      "country[T.Japan]     -0.9747   0.3951 -2.4670 0.0136 -1.7492 -0.2003\n",
      "country[T.Lithuania] -0.0472   0.5301 -0.0891 0.9290 -1.0863  0.9918\n",
      "country[T.Poland]    -0.3297   0.6008 -0.5488 0.5831 -1.5073  0.8478\n",
      "country[T.Romania]    0.4115   0.2888  1.4249 0.1542 -0.1545  0.9776\n",
      "country[T.Russia]    -0.2759   0.1987 -1.3882 0.1651 -0.6653  0.1136\n",
      "country[T.Serbia]     0.3193   0.2810  1.1362 0.2559 -0.2315  0.8701\n",
      "country[T.Thailand]  -0.0545   0.9395 -0.0580 0.9538 -1.8959  1.7870\n",
      "country[T.UK]        -0.2359   0.2332 -1.0114 0.3118 -0.6931  0.2212\n",
      "age_group             0.2456   0.0622  3.9511 0.0001  0.1238  0.3674\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.182571  0.439613  0.283303  1.843297e-08        0.000005\n",
      "tobacco_ever[T.Yes]   1.587752  2.795076  2.106630  2.411644e-07        0.000067\n",
      "sex[T.Male]           1.034029  1.828499  1.375035  2.851870e-02        1.000000\n",
      "country[T.Brazil]     0.469585  1.267200  0.771400  3.054220e-01        1.000000\n",
      "country[T.Canada]     0.372131  1.105013  0.641256  1.095291e-01        1.000000\n",
      "country[T.Japan]      0.173921  0.818467  0.377291  1.362659e-02        1.000000\n",
      "country[T.Lithuania]  0.337469  2.696150  0.953870  9.290140e-01        1.000000\n",
      "country[T.Poland]     0.221502  2.334587  0.719108  5.831244e-01        1.000000\n",
      "country[T.Romania]    0.856818  2.658029  1.509121  1.541841e-01        1.000000\n",
      "country[T.Russia]     0.514095  1.120337  0.758920  1.650877e-01        1.000000\n",
      "country[T.Serbia]     0.793350  2.387116  1.376161  2.558686e-01        1.000000\n",
      "country[T.Thailand]   0.150188  5.971288  0.947004  9.537823e-01        1.000000\n",
      "country[T.UK]         0.500047  1.247629  0.789856  3.118220e-01        1.000000\n",
      "age_group             1.131734  1.443946  1.278344  7.779611e-05        0.021705\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running logistic regression with parameter tobacco_ever, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 872\n",
      "All counts for signature DBS4: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.605964  1.551418  0.969476  8.422143e-01    1.000000e+00\n",
      "sex[T.Male]           0.649549  1.667829  1.035505  8.441373e-01    1.000000e+00\n",
      "country[T.Brazil]     0.303536  1.526864  0.715680  3.903744e-01    1.000000e+00\n",
      "country[T.Canada]     0.379066  1.956257  0.903695  8.031995e-01    1.000000e+00\n",
      "country[T.Japan]      0.090128  1.489515  0.458738  2.098910e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.993862  9.126831  3.176710  4.993386e-02    1.000000e+00\n",
      "country[T.Poland]     0.104032  4.368183  0.983992  8.668654e-01    1.000000e+00\n",
      "country[T.Romania]    0.156817  1.286745  0.504041  1.590885e-01    1.000000e+00\n",
      "country[T.Russia]     0.427891  1.460657  0.798890  4.589291e-01    1.000000e+00\n",
      "country[T.Serbia]     0.058474  0.915967  0.292268  3.242493e-02    1.000000e+00\n",
      "country[T.Thailand]   0.004005  4.998769  0.533547  6.308786e-01    1.000000e+00\n",
      "country[T.UK]         0.228764  1.142442  0.537991  1.065766e-01    1.000000e+00\n",
      "age_group             1.068929  1.612481  1.308855  8.824407e-03    1.000000e+00\n",
      "Intercept             0.037209  0.158745  0.078770  1.267435e-14    3.536144e-12\n",
      "Running logistic regression with parameter tobacco_ever, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 926\n",
      "All counts for signature DBS9: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.462226   1.904089  0.938001  7.834157e-01    1.000000e+00\n",
      "sex[T.Male]           0.749738   3.371281  1.547963  2.339956e-01    1.000000e+00\n",
      "country[T.Brazil]     0.326835   3.238345  1.119427  7.509353e-01    1.000000e+00\n",
      "country[T.Canada]     0.396630   4.001047  1.369066  5.909981e-01    1.000000e+00\n",
      "country[T.Japan]      0.002089   2.189339  0.271052  2.588097e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.228317  10.339700  2.204904  3.948241e-01    1.000000e+00\n",
      "country[T.Poland]     0.006551   7.346715  0.858920  7.527286e-01    1.000000e+00\n",
      "country[T.Romania]    0.001286   1.304205  0.165939  9.723122e-02    1.000000e+00\n",
      "country[T.Russia]     0.486975   2.901014  1.193206  6.497368e-01    1.000000e+00\n",
      "country[T.Serbia]     0.308079   3.928812  1.244799  6.571794e-01    1.000000e+00\n",
      "country[T.Thailand]   0.015996  21.869489  2.163723  5.901626e-01    1.000000e+00\n",
      "country[T.UK]         0.090800   1.646073  0.473951  2.387886e-01    1.000000e+00\n",
      "age_group             0.813545   1.504158  1.102133  5.019565e-01    1.000000e+00\n",
      "Intercept             0.009131   0.085815  0.029856  6.065012e-14    1.692138e-11\n",
      "Running logistic regression with parameter tobacco_ever, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 868\n",
      "All counts for signature DBS78C: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.741667  1.838999  1.164592  5.038722e-01    1.000000e+00\n",
      "sex[T.Male]           0.944471  2.463965  1.509243  8.530017e-02    1.000000e+00\n",
      "country[T.Brazil]     0.469791  2.311767  1.082165  8.161832e-01    1.000000e+00\n",
      "country[T.Canada]     0.917660  3.943163  1.936455  8.151451e-02    1.000000e+00\n",
      "country[T.Japan]      0.111133  1.876989  0.569543  3.794167e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.100050  4.063697  0.938064  8.629863e-01    1.000000e+00\n",
      "country[T.Poland]     0.002811  2.924819  0.364332  4.122879e-01    1.000000e+00\n",
      "country[T.Romania]    0.290054  2.082964  0.845629  7.155697e-01    1.000000e+00\n",
      "country[T.Russia]     0.481277  1.786985  0.936193  8.202576e-01    1.000000e+00\n",
      "country[T.Serbia]     0.396915  2.528706  1.070058  8.397208e-01    1.000000e+00\n",
      "country[T.Thailand]   0.007001  8.821741  0.933259  8.882020e-01    1.000000e+00\n",
      "country[T.UK]         0.679407  2.659092  1.363132  3.663194e-01    1.000000e+00\n",
      "age_group             0.975353  1.446585  1.185350  8.694535e-02    1.000000e+00\n",
      "Intercept             0.024806  0.108780  0.053308  4.389681e-19    1.224721e-16\n",
      "Running logistic regression with parameter tobacco_ever, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 903\n",
      "All counts for signature DBS78D: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]    0.413415    1.558633   0.804893  4.776345e-01    1.000000e+00\n",
      "sex[T.Male]            0.420844    1.508620   0.796372  4.525378e-01    1.000000e+00\n",
      "country[T.Brazil]      0.080349    4.073035   0.793587  6.583244e-01    1.000000e+00\n",
      "country[T.Canada]      0.302592    7.428085   1.726409  4.960125e-01    1.000000e+00\n",
      "country[T.Japan]       0.184085    9.882678   1.854295  4.757839e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.374169   21.591498   3.863117  1.920244e-01    1.000000e+00\n",
      "country[T.Poland]      0.739540   45.213474   7.766369  7.338316e-02    1.000000e+00\n",
      "country[T.Romania]    17.067865  127.592144  42.809054  9.371513e-20    2.614652e-17\n",
      "country[T.Russia]      0.311856    4.343047   1.193444  6.701151e-01    1.000000e+00\n",
      "country[T.Serbia]      3.926758   35.243258  11.100082  5.282796e-06    1.473900e-03\n",
      "country[T.Thailand]    0.019338   31.128863   2.698326  5.063299e-01    1.000000e+00\n",
      "country[T.UK]          0.181128    4.372493   1.026747  6.918266e-01    1.000000e+00\n",
      "age_group              1.172996    2.079095   1.548845  1.731337e-03    4.830431e-01\n",
      "Intercept              0.002633    0.030726   0.009823  8.827160e-23    2.462778e-20\n",
      "Running logistic regression with parameter tobacco_ever, signature ID1\n",
      "Zero counts for signature ID1: 481\n",
      "All counts for signature ID1: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1311.7455 \n",
      "Link Function:         Logit             BIC:             -5220.2262\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -641.87   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1283.7    \n",
      "Df Model:              13                Pearson chi2:    963.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.4605   0.2122 -2.1703 0.0300 -0.8763 -0.0446\n",
      "tobacco_ever[T.Yes]  -0.3293   0.1405 -2.3440 0.0191 -0.6046 -0.0539\n",
      "sex[T.Male]           0.1691   0.1417  1.1936 0.2326 -0.1086  0.4468\n",
      "country[T.Brazil]     0.1099   0.2432  0.4518 0.6514 -0.3667  0.5864\n",
      "country[T.Canada]     0.4379   0.2753  1.5906 0.1117 -0.1017  0.9776\n",
      "country[T.Japan]      0.0259   0.3642  0.0711 0.9433 -0.6879  0.7397\n",
      "country[T.Lithuania] -0.9043   0.5605 -1.6133 0.1067 -2.0028  0.1943\n",
      "country[T.Poland]    -1.5560   0.7837 -1.9855 0.0471 -3.0921 -0.0200\n",
      "country[T.Romania]   -0.4125   0.2872 -1.4365 0.1509 -0.9753  0.1503\n",
      "country[T.Russia]     0.2438   0.1915  1.2731 0.2030 -0.1316  0.6192\n",
      "country[T.Serbia]    -0.3541   0.2833 -1.2497 0.2114 -0.9094  0.2012\n",
      "country[T.Thailand]   1.1562   1.1321  1.0213 0.3071 -1.0627  3.3750\n",
      "country[T.UK]        -0.1919   0.2285 -0.8397 0.4011 -0.6398  0.2560\n",
      "age_group             0.2641   0.0603  4.3832 0.0000  0.1460  0.3823\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.416317   0.956354  0.630989  0.029984        1.000000\n",
      "tobacco_ever[T.Yes]   0.546296   0.947484  0.719448  0.019079        1.000000\n",
      "sex[T.Male]           0.897117   1.563251  1.184238  0.232629        1.000000\n",
      "country[T.Brazil]     0.692995   1.797570  1.116113  0.651436        1.000000\n",
      "country[T.Canada]     0.903317   2.657947  1.549506  0.111690        1.000000\n",
      "country[T.Japan]      0.502629   2.095352  1.026248  0.943286        1.000000\n",
      "country[T.Lithuania]  0.134958   1.214431  0.404842  0.106670        1.000000\n",
      "country[T.Poland]     0.045406   0.980210  0.210969  0.047092        1.000000\n",
      "country[T.Romania]    0.377069   1.162201  0.661990  0.150858        1.000000\n",
      "country[T.Russia]     0.876731   1.857454  1.276122  0.202990        1.000000\n",
      "country[T.Serbia]     0.402782   1.222912  0.701831  0.211413        1.000000\n",
      "country[T.Thailand]   0.345534  29.223537  3.177690  0.307125        1.000000\n",
      "country[T.UK]         0.527411   1.291742  0.825396  0.401060        1.000000\n",
      "age_group             1.157230   1.465580  1.302310  0.000012        0.003263\n",
      "Running logistic regression with parameter tobacco_ever, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 945\n",
      "All counts for signature ID2: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.252441   2.027974  0.724593  4.117794e-01        1.000000\n",
      "sex[T.Male]           0.374218   3.023756  1.029775  5.769665e-01        1.000000\n",
      "country[T.Brazil]     0.001731   2.056395  0.228403  1.771772e-01        1.000000\n",
      "country[T.Canada]     0.002556   3.068373  0.337809  3.941047e-01        1.000000\n",
      "country[T.Japan]      0.637901  16.462365  3.711590  1.032135e-01        1.000000\n",
      "country[T.Lithuania]  0.010753  13.726040  1.437142  5.062027e-01        1.000000\n",
      "country[T.Poland]     0.012237  16.077832  1.643945  4.643447e-01        1.000000\n",
      "country[T.Romania]    0.324377   7.820795  1.837371  3.413755e-01        1.000000\n",
      "country[T.Russia]     0.225203   3.170622  0.866209  5.396313e-01        1.000000\n",
      "country[T.Serbia]     0.002397   2.872017  0.316777  2.713847e-01        1.000000\n",
      "country[T.Thailand]   0.029905  49.227238  4.189119  3.155582e-01        1.000000\n",
      "country[T.UK]         0.347715   5.851776  1.525801  3.880195e-01        1.000000\n",
      "age_group             0.557156   1.322096  0.856259  3.623116e-01        1.000000\n",
      "Intercept             0.007881   0.130438  0.036090  5.625158e-09        0.000002\n",
      "Running logistic regression with parameter tobacco_ever, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 920\n",
      "All counts for signature ID3: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.837451   3.309871  1.643232  1.420044e-01    1.000000e+00\n",
      "sex[T.Male]           0.457623   1.771572  0.890208  6.528515e-01    1.000000e+00\n",
      "country[T.Brazil]     0.246893   3.268034  1.007879  7.467918e-01    1.000000e+00\n",
      "country[T.Canada]     0.156369   3.047160  0.834773  7.957859e-01    1.000000e+00\n",
      "country[T.Japan]      0.309735   6.343414  1.684785  4.534583e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.005909   6.650686  0.775214  6.858726e-01    1.000000e+00\n",
      "country[T.Poland]     0.302147  14.969733  2.995547  2.616471e-01    1.000000e+00\n",
      "country[T.Romania]    2.329268  14.712246  5.784115  2.004928e-04    5.593749e-02\n",
      "country[T.Russia]     0.309987   2.541188  0.913985  7.303425e-01    1.000000e+00\n",
      "country[T.Serbia]     0.065364   2.778826  0.619049  5.192416e-01    1.000000e+00\n",
      "country[T.Thailand]   0.818659  56.997385  8.897890  6.297670e-02    1.000000e+00\n",
      "country[T.UK]         0.379409   3.447739  1.200193  6.304252e-01    1.000000e+00\n",
      "age_group             0.843451   1.505120  1.122191  4.035431e-01    1.000000e+00\n",
      "Intercept             0.008141   0.069129  0.025179  9.901789e-17    2.762599e-14\n",
      "Running logistic regression with parameter tobacco_ever, signature ID5\n",
      "Zero counts for signature ID5: 481\n",
      "All counts for signature ID5: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1146.5794 \n",
      "Link Function:         Logit             BIC:             -5385.3923\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -559.29   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1118.6    \n",
      "Df Model:              13                Pearson chi2:    964.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.4654   0.2410 -6.0807 0.0000 -1.9378 -0.9931\n",
      "tobacco_ever[T.Yes]  -0.0790   0.1531 -0.5160 0.6058 -0.3790  0.2211\n",
      "sex[T.Male]           0.5113   0.1562  3.2737 0.0011  0.2052  0.8174\n",
      "country[T.Brazil]    -0.9738   0.2698 -3.6096 0.0003 -1.5026 -0.4451\n",
      "country[T.Canada]    -0.8139   0.2948 -2.7609 0.0058 -1.3917 -0.2361\n",
      "country[T.Japan]     -2.7842   0.5000 -5.5679 0.0000 -3.7643 -1.8041\n",
      "country[T.Lithuania] -0.0867   0.5632 -0.1539 0.8777 -1.1905  1.0172\n",
      "country[T.Poland]    -0.9973   0.6647 -1.5003 0.1335 -2.3002  0.3056\n",
      "country[T.Romania]    0.3524   0.3281  1.0739 0.2828 -0.2907  0.9954\n",
      "country[T.Russia]    -0.4554   0.2063 -2.2071 0.0273 -0.8598 -0.0510\n",
      "country[T.Serbia]    -0.0649   0.3026 -0.2144 0.8303 -0.6580  0.5282\n",
      "country[T.Thailand]  -2.3628   1.1588 -2.0391 0.0414 -4.6339 -0.0917\n",
      "country[T.UK]        -0.3619   0.2480 -1.4594 0.1445 -0.8479  0.1241\n",
      "age_group             0.8113   0.0734 11.0519 0.0000  0.6674  0.9552\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.144023  0.370432  0.230977  1.196802e-09    3.339077e-07\n",
      "tobacco_ever[T.Yes]   0.684513  1.247392  0.924044  6.058474e-01    1.000000e+00\n",
      "sex[T.Male]           1.227731  2.264541  1.667407  1.061676e-03    2.962077e-01\n",
      "country[T.Brazil]     0.222549  0.640784  0.377632  3.066150e-04    8.554558e-02\n",
      "country[T.Canada]     0.248641  0.789682  0.443111  5.763978e-03    1.000000e+00\n",
      "country[T.Japan]      0.023184  0.164618  0.061778  2.578485e-08    7.193972e-06\n",
      "country[T.Lithuania]  0.304065  2.765335  0.916974  8.776876e-01    1.000000e+00\n",
      "country[T.Poland]     0.100244  1.357378  0.368875  1.335371e-01    1.000000e+00\n",
      "country[T.Romania]    0.747740  2.705839  1.422415  2.828483e-01    1.000000e+00\n",
      "country[T.Russia]     0.423255  0.950296  0.634207  2.731048e-02    1.000000e+00\n",
      "country[T.Serbia]     0.517907  1.695914  0.937190  8.302578e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009717  0.912377  0.094155  4.143976e-02    1.000000e+00\n",
      "country[T.UK]         0.428304  1.132166  0.696356  1.444598e-01    1.000000e+00\n",
      "age_group             1.949190  2.599096  2.250807  2.145371e-28    5.985584e-26\n",
      "Running logistic regression with parameter tobacco_ever, signature ID8\n",
      "Zero counts for signature ID8: 260\n",
      "All counts for signature ID8: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1055.1280 \n",
      "Link Function:         Logit             BIC:             -5476.8438\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -513.56   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -561.04   \n",
      "No. Observations:      961               Deviance:        1027.1    \n",
      "Df Model:              13                Pearson chi2:    956.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.0689   0.2399  0.2872 0.7739 -0.4013  0.5391\n",
      "tobacco_ever[T.Yes]  -0.0028   0.1613 -0.0176 0.9860 -0.3189  0.3133\n",
      "sex[T.Male]           0.4162   0.1614  2.5777 0.0099  0.0997  0.7326\n",
      "country[T.Brazil]    -0.7763   0.2738 -2.8354 0.0046 -1.3130 -0.2397\n",
      "country[T.Canada]    -0.2465   0.3312 -0.7443 0.4567 -0.8956  0.4026\n",
      "country[T.Japan]     -1.5096   0.3958 -3.8145 0.0001 -2.2853 -0.7340\n",
      "country[T.Lithuania]  0.4631   0.7839  0.5908 0.5547 -1.0733  1.9996\n",
      "country[T.Poland]    -0.2932   0.6458 -0.4540 0.6498 -1.5589  0.9725\n",
      "country[T.Romania]   -0.6716   0.3264 -2.0574 0.0396 -1.3113 -0.0318\n",
      "country[T.Russia]    -0.1172   0.2280 -0.5140 0.6072 -0.5642  0.3297\n",
      "country[T.Serbia]    -0.7006   0.3075 -2.2788 0.0227 -1.3033 -0.0980\n",
      "country[T.Thailand]  -1.2885   0.9449 -1.3636 0.1727 -3.1406  0.5635\n",
      "country[T.UK]        -0.2436   0.2818 -0.8644 0.3874 -0.7960  0.3088\n",
      "age_group             0.5556   0.0716  7.7645 0.0000  0.4153  0.6958\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.669452  1.714475  1.071335  7.739414e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.726928  1.367866  0.997166  9.859598e-01    1.000000e+00\n",
      "sex[T.Male]           1.104877  2.080432  1.516121  9.945063e-03    1.000000e+00\n",
      "country[T.Brazil]     0.269022  0.786869  0.460093  4.576735e-03    1.000000e+00\n",
      "country[T.Canada]     0.408360  1.495734  0.781536  4.567074e-01    1.000000e+00\n",
      "country[T.Japan]      0.101744  0.480008  0.220994  1.364481e-04    3.806901e-02\n",
      "country[T.Lithuania]  0.341862  7.386022  1.589025  5.546745e-01    1.000000e+00\n",
      "country[T.Poland]     0.210375  2.644453  0.745873  6.498000e-01    1.000000e+00\n",
      "country[T.Romania]    0.269459  0.968702  0.510906  3.964978e-02    1.000000e+00\n",
      "country[T.Russia]     0.568825  1.390615  0.889391  6.072450e-01    1.000000e+00\n",
      "country[T.Serbia]     0.271646  0.906629  0.496268  2.268052e-02    1.000000e+00\n",
      "country[T.Thailand]   0.043258  1.756766  0.275672  1.726786e-01    1.000000e+00\n",
      "country[T.UK]         0.451151  1.361729  0.783802  3.873779e-01    1.000000e+00\n",
      "age_group             1.514874  2.005347  1.742943  8.199492e-15    2.287658e-12\n",
      "Running logistic regression with parameter tobacco_ever, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 957\n",
      "All counts for signature ID9: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.308024   23.088732   2.059376  4.101646e-01        1.000000\n",
      "sex[T.Male]           0.185678   12.336978   1.196410  6.237919e-01        1.000000\n",
      "country[T.Brazil]     0.004189    7.302258   0.581571  5.202364e-01        1.000000\n",
      "country[T.Canada]     0.160117   14.016129   1.796904  5.782211e-01        1.000000\n",
      "country[T.Japan]      0.007389   14.299791   1.055337  5.607305e-01        1.000000\n",
      "country[T.Lithuania]  0.022404   42.729347   3.174849  3.594637e-01        1.000000\n",
      "country[T.Poland]     0.025651   50.730402   3.667075  3.208362e-01        1.000000\n",
      "country[T.Romania]    0.005970   10.370154   0.828074  6.200057e-01        1.000000\n",
      "country[T.Russia]     0.002148    4.112552   0.304443  3.378121e-01        1.000000\n",
      "country[T.Serbia]     0.233843   21.847897   2.686228  3.125670e-01        1.000000\n",
      "country[T.Thailand]   0.066857  173.327968  10.085983  2.020652e-01        1.000000\n",
      "country[T.UK]         0.002788    4.949229   0.389226  3.394859e-01        1.000000\n",
      "age_group             0.552409    3.278848   1.264666  4.924687e-01        1.000000\n",
      "Intercept             0.000089    0.053370   0.003927  3.322928e-08        0.000009\n",
      "Running logistic regression with parameter tobacco_ever, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 953\n",
      "All counts for signature ID11: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.023986   1.213531  0.235698  7.861673e-02         1.00000\n",
      "sex[T.Male]           0.137026   2.422243  0.613014  4.442867e-01         1.00000\n",
      "country[T.Brazil]     0.002927   4.255458  0.397376  3.877971e-01         1.00000\n",
      "country[T.Canada]     0.203576  14.197568  2.181766  4.316319e-01         1.00000\n",
      "country[T.Japan]      0.009958  15.222180  1.365803  5.568359e-01         1.00000\n",
      "country[T.Lithuania]  0.012295  19.378197  1.699896  4.784773e-01         1.00000\n",
      "country[T.Poland]     0.033881  65.409000  4.839885  3.272529e-01         1.00000\n",
      "country[T.Romania]    0.003838   5.501099  0.519624  4.851227e-01         1.00000\n",
      "country[T.Russia]     0.218359   5.569579  1.097867  6.165879e-01         1.00000\n",
      "country[T.Serbia]     0.003971   5.730619  0.538336  4.818825e-01         1.00000\n",
      "country[T.Thailand]   0.021493  43.116130  3.111328  4.276408e-01         1.00000\n",
      "country[T.UK]         0.116350   7.738644  1.226265  5.086549e-01         1.00000\n",
      "age_group             0.679254   2.379662  1.234258  4.098547e-01         1.00000\n",
      "Intercept             0.001768   0.114194  0.018709  2.138710e-07         0.00006\n",
      "Running logistic regression with parameter tobacco_ever, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 954\n",
      "All counts for signature ID12: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.117015      2.788629   0.653089  0.434566        1.000000\n",
      "sex[T.Male]           0.128605      2.395574   0.576850  0.315687        1.000000\n",
      "country[T.Brazil]     1.333380   1956.675846  14.393065  0.020646        1.000000\n",
      "country[T.Canada]     0.022074    780.105632   4.148480  0.473966        1.000000\n",
      "country[T.Japan]      0.051370   1823.303871   9.672546  0.199076        1.000000\n",
      "country[T.Lithuania]  0.106766   3905.391477  20.388273  0.137489        1.000000\n",
      "country[T.Poland]     0.065907   2454.444739  12.738588  0.123432        1.000000\n",
      "country[T.Romania]    0.020815    725.687006   3.887272  0.306094        1.000000\n",
      "country[T.Russia]     0.529068    756.928969   5.593369  0.131433        1.000000\n",
      "country[T.Serbia]     0.014123    492.815566   2.638875  0.328453        1.000000\n",
      "country[T.Thailand]   0.322065  14731.612912  68.472752  0.059934        1.000000\n",
      "country[T.UK]         0.426776   1197.409658   8.142941  0.124809        1.000000\n",
      "age_group             0.205483      0.910157   0.461979  0.020342        1.000000\n",
      "Intercept             0.000091      0.124469   0.012695  0.000001        0.000389\n",
      "Running logistic regression with parameter tobacco_ever, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 944\n",
      "All counts for signature ID83C: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]    0.238367      2.285891    0.754108  5.305444e-01    1.000000e+00\n",
      "sex[T.Male]            0.386814      3.584608    1.145584  6.689836e-01    1.000000e+00\n",
      "country[T.Brazil]      0.016332    560.359192    3.025082  4.299347e-01    1.000000e+00\n",
      "country[T.Canada]      0.579863   1630.616888   11.084654  1.061651e-01    1.000000e+00\n",
      "country[T.Japan]       0.033745   1185.169352    6.324505  3.064959e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.076864   2723.726834   14.469838  1.769361e-01    1.000000e+00\n",
      "country[T.Poland]      0.137312   4949.091905   26.048856  1.371729e-01    1.000000e+00\n",
      "country[T.Romania]    16.640395  16612.852435  129.032687  1.938560e-10    5.408582e-08\n",
      "country[T.Russia]      0.007819    271.118071    1.455904  6.152955e-01    1.000000e+00\n",
      "country[T.Serbia]      3.036852   4362.863473   32.198499  2.345818e-03    6.544834e-01\n",
      "country[T.Thailand]    0.176828   6974.318483   35.105375  1.167205e-01    1.000000e+00\n",
      "country[T.UK]          0.012284    422.343117    2.277772  4.558323e-01    1.000000e+00\n",
      "age_group              0.923130      2.470521    1.472286  9.589237e-02    1.000000e+00\n",
      "Intercept              0.000006      0.009959    0.000872  4.248534e-18    1.185341e-15\n",
      "Running logistic regression with parameter tobacco_ever, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 481\n",
      "All counts for signature SBS_burden: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.842714   1.575609  1.151715  3.729825e-01    1.000000e+00\n",
      "sex[T.Male]           1.424533   2.708518  1.959239  3.162641e-05    8.823767e-03\n",
      "country[T.Brazil]     0.321739   0.952125  0.555417  3.227918e-02    1.000000e+00\n",
      "country[T.Canada]     0.176452   0.596007  0.326817  2.439610e-04    6.806511e-02\n",
      "country[T.Japan]      0.187811   0.972171  0.426904  4.254700e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.285147   2.541897  0.838477  7.285711e-01    1.000000e+00\n",
      "country[T.Poland]     0.193742   2.584661  0.732695  6.266692e-01    1.000000e+00\n",
      "country[T.Romania]    3.496526  19.547025  7.842931  7.973533e-08    2.224616e-05\n",
      "country[T.Russia]     0.487470   1.127575  0.741804  1.619118e-01    1.000000e+00\n",
      "country[T.Serbia]     0.820998   2.823028  1.515669  1.840879e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000263   0.363622  0.035843  2.436465e-03    6.797738e-01\n",
      "country[T.UK]         0.459913   1.252598  0.759114  2.750207e-01    1.000000e+00\n",
      "age_group             2.394166   3.292596  2.796220  7.270892e-49    2.028579e-46\n",
      "Intercept             0.056066   0.158684  0.095329  9.447918e-22    2.635969e-19\n",
      "Running logistic regression with parameter tobacco_ever, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 530\n",
      "All counts for signature DBS_burden: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "tobacco_ever[T.Yes]   0.801407  1.490542  1.092489  5.729856e-01    1.000000e+00\n",
      "sex[T.Male]           1.161888  2.185593  1.590682  3.713932e-03    1.000000e+00\n",
      "country[T.Brazil]     0.379947  1.069320  0.639249  8.809126e-02    1.000000e+00\n",
      "country[T.Canada]     0.230532  0.736796  0.414998  2.589187e-03    7.223833e-01\n",
      "country[T.Japan]      0.091265  0.468360  0.211999  1.000252e-04    2.790703e-02\n",
      "country[T.Lithuania]  0.539177  5.032744  1.558902  4.156347e-01    1.000000e+00\n",
      "country[T.Poland]     0.198378  2.262121  0.694182  5.454209e-01    1.000000e+00\n",
      "country[T.Romania]    1.810707  8.689513  3.819218  2.979287e-04    8.312212e-02\n",
      "country[T.Russia]     0.502772  1.159735  0.764028  2.060480e-01    1.000000e+00\n",
      "country[T.Serbia]     0.747300  2.418484  1.339524  3.267308e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000354  0.454511  0.047439  4.859799e-03    1.000000e+00\n",
      "country[T.UK]         0.622362  1.825153  1.061828  7.975074e-01    1.000000e+00\n",
      "age_group             1.784440  2.384883  2.057017  3.323702e-26    9.273128e-24\n",
      "Intercept             0.123892  0.325875  0.202480  1.402748e-11    3.913667e-09\n",
      "Running logistic regression with parameter tobacco_ever, signature ID_burden\n",
      "Zero counts for signature ID_burden: 482\n",
      "All counts for signature ID_burden: 961\n",
      "Covariates used: ['sex', 'age_group', 'country']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1112.6005 \n",
      "Link Function:         Logit             BIC:             -5419.3712\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -542.30   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -666.11   \n",
      "No. Observations:      961               Deviance:        1084.6    \n",
      "Df Model:              13                Pearson chi2:    959.      \n",
      "Df Residuals:          947               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.7446   0.2496 -6.9894 0.0000 -2.2338 -1.2554\n",
      "tobacco_ever[T.Yes]  -0.1641   0.1560 -1.0517 0.2929 -0.4698  0.1417\n",
      "sex[T.Male]           0.6143   0.1602  3.8349 0.0001  0.3003  0.9282\n",
      "country[T.Brazil]    -0.8240   0.2725 -3.0236 0.0025 -1.3582 -0.2899\n",
      "country[T.Canada]    -1.0580   0.3041 -3.4795 0.0005 -1.6539 -0.4620\n",
      "country[T.Japan]     -2.6428   0.4840 -5.4600 0.0000 -3.5915 -1.6941\n",
      "country[T.Lithuania] -0.6290   0.5554 -1.1324 0.2575 -1.7176  0.4596\n",
      "country[T.Poland]    -0.9165   0.6760 -1.3558 0.1752 -2.2415  0.4084\n",
      "country[T.Romania]    0.8116   0.3529  2.3000 0.0214  0.1200  1.5033\n",
      "country[T.Russia]    -0.3323   0.2098 -1.5839 0.1132 -0.7435  0.0789\n",
      "country[T.Serbia]    -0.1166   0.3088 -0.3776 0.7057 -0.7218  0.4886\n",
      "country[T.Thailand]  -2.3970   1.1676 -2.0529 0.0401 -4.6856 -0.1085\n",
      "country[T.UK]        -0.2728   0.2526 -1.0801 0.2801 -0.7679  0.2223\n",
      "age_group             0.9136   0.0770 11.8696 0.0000  0.7627  1.0644\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.107118  0.284966  0.174715  2.760020e-12    7.700455e-10\n",
      "tobacco_ever[T.Yes]   0.625135  1.152204  0.848695  2.929305e-01    1.000000e+00\n",
      "sex[T.Male]           1.350310  2.530076  1.848347  1.256255e-04    3.504950e-02\n",
      "country[T.Brazil]     0.257129  0.748353  0.438661  2.497618e-03    6.968353e-01\n",
      "country[T.Canada]     0.191296  0.630008  0.347157  5.024391e-04    1.401805e-01\n",
      "country[T.Japan]      0.027557  0.183759  0.071161  4.761125e-08    1.328354e-05\n",
      "country[T.Lithuania]  0.179494  1.583516  0.533134  2.574595e-01    1.000000e+00\n",
      "country[T.Poland]     0.106297  1.504476  0.399901  1.751699e-01    1.000000e+00\n",
      "country[T.Romania]    1.127479  4.496293  2.251549  2.144958e-02    1.000000e+00\n",
      "country[T.Russia]     0.475434  1.082103  0.717265  1.132222e-01    1.000000e+00\n",
      "country[T.Serbia]     0.485887  1.630006  0.889943  7.057153e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009227  0.897172  0.090986  4.008294e-02    1.000000e+00\n",
      "country[T.UK]         0.463986  1.248885  0.761226  2.801020e-01    1.000000e+00\n",
      "age_group             2.144108  2.899188  2.493225  1.701868e-32    4.748212e-30\n",
      "Using below/above median model for signature SBS1, its frequency is 0.77\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.86\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter bmi_q, signature SBS1\n",
      "Zero counts for signature SBS1: 480\n",
      "All counts for signature SBS1: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1262.1407 \n",
      "Link Function:         Logit             BIC:             -5257.1112\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -616.07   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -665.42   \n",
      "No. Observations:      960               Deviance:        1232.1    \n",
      "Df Model:              14                Pearson chi2:    958.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.9245   0.2482 -3.7243 0.0002 -1.4111 -0.4380\n",
      "sex[T.Male]          -0.0340   0.1454 -0.2339 0.8151 -0.3190  0.2510\n",
      "country[T.Brazil]     0.1349   0.2483  0.5433 0.5869 -0.3518  0.6216\n",
      "country[T.Canada]     0.7653   0.2872  2.6642 0.0077  0.2023  1.3282\n",
      "country[T.Japan]      0.2586   0.3777  0.6847 0.4936 -0.4816  0.9988\n",
      "country[T.Lithuania]  0.4975   0.5450  0.9128 0.3613 -0.5707  1.5656\n",
      "country[T.Poland]    -0.5370   0.6303 -0.8520 0.3942 -1.7723  0.6984\n",
      "country[T.Romania]   -1.0175   0.3182 -3.1976 0.0014 -1.6411 -0.3938\n",
      "country[T.Russia]     0.5001   0.1954  2.5588 0.0105  0.1171  0.8832\n",
      "country[T.Serbia]    -0.7645   0.3089 -2.4749 0.0133 -1.3699 -0.1591\n",
      "country[T.Thailand]   0.1556   0.9377  0.1659 0.8682 -1.6823  1.9936\n",
      "country[T.UK]         0.4123   0.2344  1.7585 0.0787 -0.0472  0.8717\n",
      "tobacco_ever[T.Yes]  -0.0019   0.1454 -0.0132 0.9895 -0.2868  0.2830\n",
      "bmi_q                -0.0301   0.0662 -0.4539 0.6499 -0.1598  0.0997\n",
      "age_group             0.4397   0.0638  6.8942 0.0000  0.3147  0.5647\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.243883  0.645341  0.396721  1.958728e-04    5.464853e-02\n",
      "sex[T.Male]           0.726876  1.285291  0.966565  8.150821e-01    1.000000e+00\n",
      "country[T.Brazil]     0.703410  1.861987  1.144439  5.869414e-01    1.000000e+00\n",
      "country[T.Canada]     1.224199  3.774397  2.149561  7.716935e-03    1.000000e+00\n",
      "country[T.Japan]      0.617776  2.714944  1.295078  4.935548e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.565154  4.785472  1.644545  3.613325e-01    1.000000e+00\n",
      "country[T.Poland]     0.169941  2.010447  0.584514  3.942372e-01    1.000000e+00\n",
      "country[T.Romania]    0.193766  0.674490  0.361515  1.385959e-03    3.866825e-01\n",
      "country[T.Russia]     1.124176  2.418622  1.648926  1.050211e-02    1.000000e+00\n",
      "country[T.Serbia]     0.254122  0.852936  0.465564  1.332662e-02    1.000000e+00\n",
      "country[T.Thailand]   0.185938  7.341682  1.168374  8.682017e-01    1.000000e+00\n",
      "country[T.UK]         0.953868  2.391087  1.510225  7.866224e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.750643  1.327096  0.998086  9.894823e-01    1.000000e+00\n",
      "bmi_q                 0.852312  1.104840  0.970396  6.498787e-01    1.000000e+00\n",
      "age_group             1.369848  1.758930  1.552245  5.415315e-12    1.510873e-09\n",
      "Running logistic regression with parameter bmi_q, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 954\n",
      "All counts for signature SBS2: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.262741    7.300310  1.246853  0.526338        1.000000\n",
      "country[T.Brazil]     0.004287    7.440374  0.594470  0.453358        1.000000\n",
      "country[T.Canada]     0.005675   10.362268  0.796239  0.772876        1.000000\n",
      "country[T.Japan]      0.007693   15.095931  1.097453  0.532076        1.000000\n",
      "country[T.Lithuania]  0.026558   54.872486  3.820928  0.330640        1.000000\n",
      "country[T.Poland]     0.024039   48.782878  3.466927  0.349116        1.000000\n",
      "country[T.Romania]    0.005955   10.203662  0.823136  0.466987        1.000000\n",
      "country[T.Russia]     0.176919    8.331403  1.211537  0.518045        1.000000\n",
      "country[T.Serbia]     0.004825    8.898731  0.679399  0.506632        1.000000\n",
      "country[T.Thailand]   0.054562  134.061533  8.199069  0.218875        1.000000\n",
      "country[T.UK]         0.362654   16.027779  2.411740  0.233339        1.000000\n",
      "tobacco_ever[T.Yes]   0.205856    5.142895  1.015441  0.496238        1.000000\n",
      "bmi_q                 0.368937    1.562142  0.760179  0.335520        1.000000\n",
      "age_group             0.486398    2.020611  0.976525  0.575684        1.000000\n",
      "Intercept             0.000861    0.159088  0.015940  0.000088        0.024532\n",
      "Running logistic regression with parameter bmi_q, signature SBS4\n",
      "Zero counts for signature SBS4: 409\n",
      "All counts for signature SBS4: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1253.4303 \n",
      "Link Function:         Logit             BIC:             -5265.8217\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -611.72   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -654.88   \n",
      "No. Observations:      960               Deviance:        1223.4    \n",
      "Df Model:              14                Pearson chi2:    955.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.3149   0.2464 -1.2783 0.2012 -0.7978  0.1680\n",
      "sex[T.Male]          -0.1979   0.1460 -1.3553 0.1753 -0.4840  0.0883\n",
      "country[T.Brazil]    -0.6387   0.2521 -2.5338 0.0113 -1.1327 -0.1446\n",
      "country[T.Canada]    -0.5686   0.2792 -2.0364 0.0417 -1.1159 -0.0214\n",
      "country[T.Japan]      0.8362   0.4769  1.7536 0.0795 -0.0984  1.7708\n",
      "country[T.Lithuania] -0.1983   0.5452 -0.3638 0.7160 -1.2669  0.8702\n",
      "country[T.Poland]     0.7261   0.6918  1.0495 0.2939 -0.6299  2.0821\n",
      "country[T.Romania]   -0.9104   0.2927 -3.1103 0.0019 -1.4841 -0.3367\n",
      "country[T.Russia]    -0.3578   0.1958 -1.8272 0.0677 -0.7416  0.0260\n",
      "country[T.Serbia]    -0.5414   0.2848 -1.9012 0.0573 -1.0996  0.0167\n",
      "country[T.Thailand]  -0.3421   0.9311 -0.3675 0.7133 -2.1671  1.4828\n",
      "country[T.UK]        -0.0570   0.2440 -0.2337 0.8152 -0.5352  0.4212\n",
      "tobacco_ever[T.Yes]   0.6656   0.1463  4.5505 0.0000  0.3789  0.9523\n",
      "bmi_q                 0.0092   0.0662  0.1383 0.8900 -0.1207  0.1390\n",
      "age_group             0.3437   0.0630  5.4560 0.0000  0.2203  0.4672\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.450310  1.182884  0.729839  2.011584e-01        1.000000\n",
      "sex[T.Male]           0.616305  1.092286  0.820476  1.753112e-01        1.000000\n",
      "country[T.Brazil]     0.322160  0.865325  0.527990  1.128220e-02        1.000000\n",
      "country[T.Canada]     0.327622  0.978874  0.566304  4.170668e-02        1.000000\n",
      "country[T.Japan]      0.906266  5.875814  2.307608  7.950437e-02        1.000000\n",
      "country[T.Lithuania]  0.281701  2.387503  0.820099  7.160272e-01        1.000000\n",
      "country[T.Poland]     0.532648  8.021115  2.066986  2.939459e-01        1.000000\n",
      "country[T.Romania]    0.226707  0.714108  0.402360  1.868660e-03        0.521356\n",
      "country[T.Russia]     0.476347  1.026338  0.699209  6.766886e-02        1.000000\n",
      "country[T.Serbia]     0.333015  1.016889  0.581927  5.728195e-02        1.000000\n",
      "country[T.Thailand]   0.114513  4.405156  0.710245  7.132732e-01        1.000000\n",
      "country[T.UK]         0.585547  1.523759  0.944581  8.152320e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.460721  2.591689  1.945696  5.351109e-06        0.001493\n",
      "bmi_q                 0.886323  1.149121  1.009204  8.899959e-01        1.000000\n",
      "age_group             1.246391  1.595531  1.410197  4.868770e-08        0.000014\n",
      "Running logistic regression with parameter bmi_q, signature SBS5\n",
      "Zero counts for signature SBS5: 883\n",
      "All counts for signature SBS5: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             510.7864  \n",
      "Link Function:         Logit             BIC:             -6008.4656\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -240.39   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -268.11   \n",
      "No. Observations:      960               Deviance:        480.79    \n",
      "Df Model:              14                Pearson chi2:    948.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.9822   0.5201 -7.6563 0.0000 -5.0016 -2.9628\n",
      "sex[T.Male]          -0.1188   0.2683 -0.4428 0.6579 -0.6446  0.4070\n",
      "country[T.Brazil]     0.1848   0.3813  0.4846 0.6280 -0.5626  0.9321\n",
      "country[T.Canada]    -0.7224   0.5177 -1.3954 0.1629 -1.7372  0.2923\n",
      "country[T.Japan]      0.3077   0.5196  0.5922 0.5537 -0.7107  1.3260\n",
      "country[T.Lithuania]  0.4309   0.6920  0.6227 0.5335 -0.9254  1.7871\n",
      "country[T.Poland]    -0.1913   1.0894 -0.1756 0.8606 -2.3265  1.9439\n",
      "country[T.Romania]   -1.4137   0.7526 -1.8784 0.0603 -2.8888  0.0614\n",
      "country[T.Russia]    -1.1321   0.4696 -2.4107 0.0159 -2.0525 -0.2117\n",
      "country[T.Serbia]    -0.4087   0.5650 -0.7233 0.4695 -1.5161  0.6987\n",
      "country[T.Thailand]   0.5203   1.1577  0.4495 0.6531 -1.7487  2.7893\n",
      "country[T.UK]        -0.7084   0.4279 -1.6556 0.0978 -1.5470  0.1302\n",
      "tobacco_ever[T.Yes]   0.3828   0.2717  1.4090 0.1588 -0.1497  0.9153\n",
      "bmi_q                 0.1488   0.1266  1.1756 0.2397 -0.0993  0.3969\n",
      "age_group             0.6430   0.1275  5.0418 0.0000  0.3930  0.8930\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.006727   0.051675  0.018645  1.913152e-14    5.337694e-12\n",
      "sex[T.Male]           0.524864   1.502348  0.887991  6.579155e-01    1.000000e+00\n",
      "country[T.Brazil]     0.569749   2.539861  1.202947  6.279662e-01    1.000000e+00\n",
      "country[T.Canada]     0.176017   1.339512  0.485568  1.629018e-01    1.000000e+00\n",
      "country[T.Japan]      0.491309   3.766008  1.360248  5.537494e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.396379   5.972367  1.538609  5.335005e-01    1.000000e+00\n",
      "country[T.Poland]     0.097633   6.985952  0.825869  8.605956e-01    1.000000e+00\n",
      "country[T.Romania]    0.055644   1.063305  0.243243  6.032582e-02    1.000000e+00\n",
      "country[T.Russia]     0.128408   0.809236  0.322355  1.592283e-02    1.000000e+00\n",
      "country[T.Serbia]     0.219569   2.011159  0.664521  4.694774e-01    1.000000e+00\n",
      "country[T.Thailand]   0.174001  16.270329  1.682576  6.531039e-01    1.000000e+00\n",
      "country[T.UK]         0.212893   1.139063  0.492441  9.779424e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.860971   2.497426  1.466360  1.588463e-01    1.000000e+00\n",
      "bmi_q                 0.905489   1.487218  1.160457  2.397380e-01    1.000000e+00\n",
      "age_group             1.481475   2.442360  1.902182  4.612271e-07    1.286824e-04\n",
      "Running logistic regression with parameter bmi_q, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 915\n",
      "All counts for signature SBS12: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.478219    2.661842    1.111867  7.445825e-01    1.000000e+00\n",
      "country[T.Brazil]      0.517965   10.055244    2.376404  2.363750e-01    1.000000e+00\n",
      "country[T.Canada]      1.376550   20.171550    5.131171  1.562579e-02    1.000000e+00\n",
      "country[T.Japan]      40.724961  486.741071  127.003526  2.958839e-22    8.255160e-20\n",
      "country[T.Lithuania]   0.013037   18.443977    1.771916  6.150470e-01    1.000000e+00\n",
      "country[T.Poland]      0.018581   26.128578    2.520386  5.296073e-01    1.000000e+00\n",
      "country[T.Romania]     0.382096   10.330693    2.233742  3.192066e-01    1.000000e+00\n",
      "country[T.Russia]      0.125012    3.392881    0.731823  6.364083e-01    1.000000e+00\n",
      "country[T.Serbia]      0.127976    7.284892    1.301995  6.992323e-01    1.000000e+00\n",
      "country[T.Thailand]    0.028154   47.746674    3.966740  4.166845e-01    1.000000e+00\n",
      "country[T.UK]          0.227385    6.138011    1.328035  6.568432e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.317060    1.699680    0.742266  4.473899e-01    1.000000e+00\n",
      "bmi_q                  0.606394    1.313414    0.886294  5.161819e-01    1.000000e+00\n",
      "age_group              0.882415    1.729748    1.227646  2.190933e-01    1.000000e+00\n",
      "Intercept              0.003174    0.060016    0.015286  9.759875e-12    2.723005e-09\n",
      "Running logistic regression with parameter bmi_q, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 806\n",
      "All counts for signature SBS13: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.145233  2.506117  1.683418  7.725328e-03    1.000000e+00\n",
      "country[T.Brazil]     0.429581  1.639199  0.862131  6.415426e-01    1.000000e+00\n",
      "country[T.Canada]     0.415078  1.760363  0.883633  7.315085e-01    1.000000e+00\n",
      "country[T.Japan]      0.104667  1.143124  0.402592  9.090382e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.373616  4.750488  1.511570  5.145421e-01    1.000000e+00\n",
      "country[T.Poland]     0.001357  1.371473  0.175086  1.130535e-01    1.000000e+00\n",
      "country[T.Romania]    0.040357  0.616184  0.200136  2.836700e-03    7.914392e-01\n",
      "country[T.Russia]     0.943152  2.424670  1.509973  8.540813e-02    1.000000e+00\n",
      "country[T.Serbia]     0.294120  1.474747  0.693438  3.510666e-01    1.000000e+00\n",
      "country[T.Thailand]   0.003852  4.844665  0.513611  6.143956e-01    1.000000e+00\n",
      "country[T.UK]         0.691449  2.136633  1.227132  4.699970e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.861366  1.810833  1.246944  2.385109e-01    1.000000e+00\n",
      "bmi_q                 0.746333  1.043126  0.881900  1.402929e-01    1.000000e+00\n",
      "age_group             0.898317  1.238930  1.054094  5.140426e-01    1.000000e+00\n",
      "Intercept             0.074296  0.273194  0.144513  6.522738e-10    1.819844e-07\n",
      "Running logistic regression with parameter bmi_q, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 889\n",
      "All counts for signature SBS18: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.050264   3.142909  1.786822  3.117445e-02        1.000000\n",
      "country[T.Brazil]     0.404842   2.174422  0.980015  7.967147e-01        1.000000\n",
      "country[T.Canada]     0.214805   1.858452  0.703503  4.981187e-01        1.000000\n",
      "country[T.Japan]      0.138605   2.411281  0.717040  5.896932e-01        1.000000\n",
      "country[T.Lithuania]  0.402335   7.931086  2.165942  3.060852e-01        1.000000\n",
      "country[T.Poland]     0.114986   5.002462  1.099881  8.132236e-01        1.000000\n",
      "country[T.Romania]    0.331771   2.417042  0.972111  7.921302e-01        1.000000\n",
      "country[T.Russia]     0.295449   1.233990  0.615012  1.674298e-01        1.000000\n",
      "country[T.Serbia]     0.270520   1.998841  0.797831  6.109489e-01        1.000000\n",
      "country[T.Thailand]   0.008995  11.570017  1.203252  7.800899e-01        1.000000\n",
      "country[T.UK]         0.359027   1.899794  0.862977  6.613987e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.464792   1.275435  0.771617  2.983242e-01        1.000000\n",
      "bmi_q                 0.715126   1.147331  0.903877  3.838351e-01        1.000000\n",
      "age_group             0.662080   1.019079  0.821799  7.190962e-02        1.000000\n",
      "Intercept             0.057105   0.305225  0.135718  4.804660e-07        0.000134\n",
      "Running logistic regression with parameter bmi_q, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter bmi_q, sig SBS21, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 955\n",
      "All counts for signature SBS21: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.179457      5.680911   0.946107  0.583153         1.00000\n",
      "country[T.Brazil]     0.014395    496.303096   2.673642  0.374296         1.00000\n",
      "country[T.Canada]     0.019223    673.066405   3.597027  0.517624         1.00000\n",
      "country[T.Japan]      0.031284   1164.827848   6.032136  0.289384         1.00000\n",
      "country[T.Lithuania]  0.098911   3674.946495  19.034529  0.152379         1.00000\n",
      "country[T.Poland]     0.068052   2526.923155  13.140340  0.192824         1.00000\n",
      "country[T.Romania]    1.670971   2925.574753  21.057138  0.011619         1.00000\n",
      "country[T.Russia]     0.159774    457.947875   3.095048  0.320862         1.00000\n",
      "country[T.Serbia]     0.015564    552.863197   2.934846  0.402745         1.00000\n",
      "country[T.Thailand]   0.266727  11148.408823  54.299139  0.085869         1.00000\n",
      "country[T.UK]         0.956165   1681.113267  12.088629  0.037004         1.00000\n",
      "tobacco_ever[T.Yes]   0.265464      8.344491   1.378335  0.471315         1.00000\n",
      "bmi_q                 0.381400      1.773528   0.814550  0.410803         1.00000\n",
      "age_group             0.322441      1.479603   0.702231  0.276619         1.00000\n",
      "Intercept             0.000033      0.090261   0.005569  0.000016         0.00436\n",
      "Running logistic regression with parameter bmi_q, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 890\n",
      "All counts for signature SBS22: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]             0.286089     1.250924    0.602942  1.634103e-01    1.000000e+00\n",
      "country[T.Brazil]       1.149597    76.658094    7.255611  3.259612e-02    1.000000e+00\n",
      "country[T.Canada]       0.837463    73.767168    6.549318  7.057358e-02    1.000000e+00\n",
      "country[T.Japan]        0.014619    42.828373    2.175553  5.178690e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.027079    82.863540    4.082294  3.634187e-01    1.000000e+00\n",
      "country[T.Poland]       0.061956   194.995002    9.412610  2.244515e-01    1.000000e+00\n",
      "country[T.Romania]    122.627482  4948.608708  523.614884  4.640942e-39    1.294823e-36\n",
      "country[T.Russia]       0.003748    10.512399    0.550836  6.123292e-01    1.000000e+00\n",
      "country[T.Serbia]      17.892831   727.681172   77.298549  1.225795e-12    3.419969e-10\n",
      "country[T.Thailand]    18.054924  2466.609313  165.565685  9.778659e-06    2.728246e-03\n",
      "country[T.UK]           0.175900    27.722915    2.208180  4.309081e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.446458     2.044561    0.950846  6.971915e-01    1.000000e+00\n",
      "bmi_q                   0.719799     1.394543    1.000851  7.829794e-01    1.000000e+00\n",
      "age_group               1.402868     2.773167    1.940771  3.279572e-05    9.150006e-03\n",
      "Intercept               0.000136     0.008840    0.001530  7.094918e-22    1.979482e-19\n",
      "Running logistic regression with parameter bmi_q, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter bmi_q, sig SBS44, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 954\n",
      "All counts for signature SBS44: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.255100      6.526580   1.159970  0.565604        1.000000\n",
      "country[T.Brazil]     0.448533   1248.408411   8.513457  0.102712        1.000000\n",
      "country[T.Canada]     0.018252    635.983037   3.406870  0.536215        1.000000\n",
      "country[T.Japan]      0.029321   1075.205307   5.611686  0.303551        1.000000\n",
      "country[T.Lithuania]  0.101139   3710.773803  19.346805  0.148619        1.000000\n",
      "country[T.Poland]     0.071310   2613.265854  13.669427  0.189027        1.000000\n",
      "country[T.Romania]    1.754575   3064.038226  22.057141  0.010722        1.000000\n",
      "country[T.Russia]     0.173350    495.749380   3.353344  0.306890        1.000000\n",
      "country[T.Serbia]     0.016582    586.366830   3.119037  0.403547        1.000000\n",
      "country[T.Thailand]   0.290937  12046.965490  58.830205  0.081755        1.000000\n",
      "country[T.UK]         0.929506   1630.349243  11.727611  0.039596        1.000000\n",
      "tobacco_ever[T.Yes]   0.396724     10.192017   1.797377  0.323900        1.000000\n",
      "bmi_q                 0.409065      1.702805   0.817265  0.404215        1.000000\n",
      "age_group             0.368823      1.487651   0.749124  0.319622        1.000000\n",
      "Intercept             0.000021      0.058329   0.003599  0.000001        0.000363\n",
      "Running logistic regression with parameter bmi_q, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 480\n",
      "All counts for signature SBS1536A: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.346560  2.472810  1.821440  9.385281e-05    2.618493e-02\n",
      "country[T.Brazil]     0.175176  0.498834  0.297138  3.884923e-06    1.083893e-03\n",
      "country[T.Canada]     0.148388  0.475043  0.266955  6.639406e-06    1.852394e-03\n",
      "country[T.Japan]      0.028124  0.167576  0.071677  1.688718e-10    4.711523e-08\n",
      "country[T.Lithuania]  0.358308  3.827698  1.078748  8.827630e-01    1.000000e+00\n",
      "country[T.Poland]     0.314338  3.628016  1.029780  9.147110e-01    1.000000e+00\n",
      "country[T.Romania]    0.200358  0.677336  0.369284  1.314056e-03    3.666215e-01\n",
      "country[T.Russia]     0.207016  0.469360  0.312766  1.471867e-08    4.106508e-06\n",
      "country[T.Serbia]     0.285924  0.924561  0.515094  2.626860e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000209  0.263874  0.027921  6.056726e-04    1.689827e-01\n",
      "country[T.UK]         0.281656  0.746750  0.459081  1.723972e-03    4.809882e-01\n",
      "tobacco_ever[T.Yes]   0.783273  1.424875  1.056419  7.041087e-01    1.000000e+00\n",
      "bmi_q                 0.939835  1.237694  1.078101  2.831651e-01    1.000000e+00\n",
      "age_group             1.778581  2.354962  2.041177  3.587920e-27    1.001030e-24\n",
      "Intercept             0.191341  0.544150  0.324249  1.759150e-05    4.908027e-03\n",
      "Running logistic regression with parameter bmi_q, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 480\n",
      "All counts for signature SBS1536B: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1263.7907 \n",
      "Link Function:         Logit             BIC:             -5255.4613\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -616.90   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -665.42   \n",
      "No. Observations:      960               Deviance:        1233.8    \n",
      "Df Model:              14                Pearson chi2:    961.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.0361   0.2500 -4.1451 0.0000 -1.5261 -0.5462\n",
      "sex[T.Male]          -0.0566   0.1455 -0.3887 0.6975 -0.3417  0.2286\n",
      "country[T.Brazil]    -0.6557   0.2621 -2.5021 0.0123 -1.1693 -0.1421\n",
      "country[T.Canada]    -0.4729   0.2821 -1.6766 0.0936 -1.0257  0.0799\n",
      "country[T.Japan]     -0.4799   0.3784 -1.2683 0.2047 -1.2216  0.2617\n",
      "country[T.Lithuania] -0.1120   0.5329 -0.2101 0.8336 -1.1564  0.9324\n",
      "country[T.Poland]    -0.3546   0.6119 -0.5795 0.5622 -1.5538  0.8446\n",
      "country[T.Romania]    0.2502   0.2945  0.8496 0.3956 -0.3270  0.8274\n",
      "country[T.Russia]     0.4186   0.1972  2.1225 0.0338  0.0321  0.8051\n",
      "country[T.Serbia]     0.1436   0.2881  0.4985 0.6182 -0.4210  0.7082\n",
      "country[T.Thailand]  -0.7763   0.9427 -0.8235 0.4102 -2.6239  1.0714\n",
      "country[T.UK]         0.3035   0.2383  1.2739 0.2027 -0.1634  0.7705\n",
      "tobacco_ever[T.Yes]   0.3747   0.1461  2.5652 0.0103  0.0884  0.6611\n",
      "bmi_q                -0.0890   0.0661 -1.3478 0.1777 -0.2185  0.0404\n",
      "age_group             0.5130   0.0648  7.9113 0.0000  0.3859  0.6401\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.217392  0.579143  0.354825  3.396817e-05    9.477119e-03\n",
      "sex[T.Male]           0.710540  1.256872  0.945017  6.975192e-01    1.000000e+00\n",
      "country[T.Brazil]     0.310570  0.867562  0.519075  1.234673e-02    1.000000e+00\n",
      "country[T.Canada]     0.358544  1.083208  0.623199  9.362289e-02    1.000000e+00\n",
      "country[T.Japan]      0.294752  1.299181  0.618819  2.046899e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.314630  2.540663  0.894075  8.335729e-01    1.000000e+00\n",
      "country[T.Poland]     0.211440  2.327117  0.701459  5.622298e-01    1.000000e+00\n",
      "country[T.Romania]    0.721079  2.287359  1.284277  3.955630e-01    1.000000e+00\n",
      "country[T.Russia]     1.032580  2.236972  1.519820  3.379317e-02    1.000000e+00\n",
      "country[T.Serbia]     0.656389  2.030281  1.154407  6.181599e-01    1.000000e+00\n",
      "country[T.Thailand]   0.072517  2.919375  0.460113  4.102406e-01    1.000000e+00\n",
      "country[T.UK]         0.849211  2.160822  1.354619  2.026853e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.092447  1.936849  1.454615  1.031073e-02    1.000000e+00\n",
      "bmi_q                 0.803688  1.041277  0.914802  1.777326e-01    1.000000e+00\n",
      "age_group             1.470985  1.896731  1.670348  2.547057e-15    7.106290e-13\n",
      "Running logistic regression with parameter bmi_q, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 844\n",
      "All counts for signature SBS1536F: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.639617   1.468030  0.965897  8.540435e-01    1.000000e+00\n",
      "country[T.Brazil]     0.703585   2.974195  1.473399  2.936202e-01    1.000000e+00\n",
      "country[T.Canada]     1.371114   5.596017  2.793637  5.145961e-03    1.000000e+00\n",
      "country[T.Japan]      0.646064   5.100108  1.954707  2.167013e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.548907   7.306240  2.261164  2.299528e-01    1.000000e+00\n",
      "country[T.Poland]     0.002984   3.094760  0.386519  4.447665e-01    1.000000e+00\n",
      "country[T.Romania]    0.075491   1.212986  0.380309  1.093546e-01    1.000000e+00\n",
      "country[T.Russia]     0.906185   2.846947  1.597290  1.048750e-01    1.000000e+00\n",
      "country[T.Serbia]     0.293478   2.123334  0.857954  7.384576e-01    1.000000e+00\n",
      "country[T.Thailand]   0.290299  17.518992  3.015396  2.978328e-01    1.000000e+00\n",
      "country[T.UK]         0.749838   2.957318  1.508422  2.415629e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.472510   1.093176  0.720322  1.227121e-01    1.000000e+00\n",
      "bmi_q                 0.917281   1.348328  1.109291  2.852292e-01    1.000000e+00\n",
      "age_group             0.827957   1.181295  0.988222  8.772026e-01    1.000000e+00\n",
      "Intercept             0.050789   0.219256  0.107723  1.014096e-10    2.829329e-08\n",
      "Running logistic regression with parameter bmi_q, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 866\n",
      "All counts for signature SBS1536I: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.469688    1.831592    0.927110  5.456594e-01    1.000000e+00\n",
      "country[T.Brazil]      0.472320    8.304466    2.112859  2.394116e-01    1.000000e+00\n",
      "country[T.Canada]      0.002490    3.043202    0.330190  3.810753e-01    1.000000e+00\n",
      "country[T.Japan]       0.135974    7.626972    1.391107  5.020672e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.418877   25.926175    4.430092  1.410947e-01    1.000000e+00\n",
      "country[T.Poland]      0.017764   25.292631    2.425142  4.169541e-01    1.000000e+00\n",
      "country[T.Romania]    70.818943  649.476127  195.920365  1.244313e-38    3.471634e-36\n",
      "country[T.Russia]      0.050929    2.616946    0.505214  3.322534e-01    1.000000e+00\n",
      "country[T.Serbia]     28.482698  246.231641   76.826201  6.020432e-24    1.679700e-21\n",
      "country[T.Thailand]    7.836441  408.395916   52.719498  1.000159e-04    2.790443e-02\n",
      "country[T.UK]          0.001410    1.695983    0.186450  1.276151e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.591438    2.377306    1.178020  4.471552e-01    1.000000e+00\n",
      "bmi_q                  0.655946    1.204198    0.889721  3.501208e-01    1.000000e+00\n",
      "age_group              1.548147    2.929801    2.100509  5.202903e-07    1.451610e-04\n",
      "Intercept              0.000862    0.015066    0.003943  4.276168e-22    1.193051e-19\n",
      "Running logistic regression with parameter bmi_q, signature DBS2\n",
      "Zero counts for signature DBS2: 560\n",
      "All counts for signature DBS2: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1268.7869 \n",
      "Link Function:         Logit             BIC:             -5250.4651\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -619.39   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -652.03   \n",
      "No. Observations:      960               Deviance:        1238.8    \n",
      "Df Model:              14                Pearson chi2:    960.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1862   0.2507 -4.7322 0.0000 -1.6775 -0.6949\n",
      "sex[T.Male]           0.3231   0.1458  2.2155 0.0267  0.0373  0.6089\n",
      "country[T.Brazil]    -0.2487   0.2541 -0.9789 0.3276 -0.7466  0.2493\n",
      "country[T.Canada]    -0.4238   0.2793 -1.5171 0.1292 -0.9713  0.1237\n",
      "country[T.Japan]     -1.0054   0.3984 -2.5237 0.0116 -1.7863 -0.2246\n",
      "country[T.Lithuania] -0.0179   0.5328 -0.0336 0.9732 -1.0621  1.0263\n",
      "country[T.Poland]    -0.3428   0.6003 -0.5710 0.5680 -1.5193  0.8338\n",
      "country[T.Romania]    0.3932   0.2902  1.3550 0.1754 -0.1755  0.9620\n",
      "country[T.Russia]    -0.2765   0.1987 -1.3912 0.1642 -0.6660  0.1130\n",
      "country[T.Serbia]     0.3005   0.2823  1.0644 0.2872 -0.2529  0.8539\n",
      "country[T.Thailand]  -0.0692   0.9415 -0.0735 0.9414 -1.9146  1.7761\n",
      "country[T.UK]        -0.2272   0.2336 -0.9726 0.3307 -0.6851  0.2307\n",
      "tobacco_ever[T.Yes]   0.7524   0.1454  5.1735 0.0000  0.4674  1.0374\n",
      "bmi_q                -0.0408   0.0661 -0.6171 0.5371 -0.1703  0.0887\n",
      "age_group             0.2401   0.0625  3.8413 0.0001  0.1176  0.3626\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.186847  0.499131  0.305386  2.221341e-06        0.000620\n",
      "sex[T.Male]           1.037973  1.838431  1.381391  2.672332e-02        1.000000\n",
      "country[T.Brazil]     0.473959  1.283072  0.779823  3.276477e-01        1.000000\n",
      "country[T.Canada]     0.378605  1.131685  0.654569  1.292416e-01        1.000000\n",
      "country[T.Japan]      0.167581  0.798851  0.365886  1.161374e-02        1.000000\n",
      "country[T.Lithuania]  0.345733  2.790711  0.982263  9.732025e-01        1.000000\n",
      "country[T.Poland]     0.218871  2.301943  0.709809  5.679959e-01        1.000000\n",
      "country[T.Romania]    0.838999  2.616806  1.481721  1.754123e-01        1.000000\n",
      "country[T.Russia]     0.513769  1.119677  0.758456  1.641765e-01        1.000000\n",
      "country[T.Serbia]     0.776573  2.348799  1.350560  2.871599e-01        1.000000\n",
      "country[T.Thailand]   0.147399  5.907029  0.933108  9.413819e-01        1.000000\n",
      "country[T.UK]         0.504016  1.259441  0.796730  3.307293e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.595764  2.821987  2.122080  2.297783e-07        0.000064\n",
      "bmi_q                 0.843434  1.092776  0.960044  5.371371e-01        1.000000\n",
      "age_group             1.124794  1.437107  1.271397  1.223942e-04        0.034148\n",
      "Running logistic regression with parameter bmi_q, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 871\n",
      "All counts for signature DBS4: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.643490  1.654276  1.026561  8.502087e-01    1.000000e+00\n",
      "country[T.Brazil]     0.295516  1.496596  0.698989  3.615793e-01    1.000000e+00\n",
      "country[T.Canada]     0.361528  1.895873  0.868163  7.280426e-01    1.000000e+00\n",
      "country[T.Japan]      0.094432  1.592153  0.483866  2.506607e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.929865  8.719307  3.000644  6.299830e-02    1.000000e+00\n",
      "country[T.Poland]     0.105827  4.455983  1.001613  8.485240e-01    1.000000e+00\n",
      "country[T.Romania]    0.158735  1.305395  0.510673  1.685529e-01    1.000000e+00\n",
      "country[T.Russia]     0.427312  1.458812  0.797878  4.564892e-01    1.000000e+00\n",
      "country[T.Serbia]     0.060427  0.952067  0.302617  3.919026e-02    1.000000e+00\n",
      "country[T.Thailand]   0.004113  5.120143  0.547565  6.411256e-01    1.000000e+00\n",
      "country[T.UK]         0.225105  1.126134  0.529784  9.989126e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.593009  1.529827  0.952479  7.784931e-01    1.000000e+00\n",
      "bmi_q                 0.867217  1.345147  1.076678  4.967413e-01    1.000000e+00\n",
      "age_group             1.077273  1.634689  1.322451  7.208606e-03    1.000000e+00\n",
      "Intercept             0.029314  0.154227  0.069100  2.719671e-12    7.587881e-10\n",
      "Running logistic regression with parameter bmi_q, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 925\n",
      "All counts for signature DBS9: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.747069   3.359563  1.542521  2.377885e-01    1.000000e+00\n",
      "country[T.Brazil]     0.326549   3.249511  1.120683  7.349542e-01    1.000000e+00\n",
      "country[T.Canada]     0.391752   4.040248  1.365039  5.931581e-01    1.000000e+00\n",
      "country[T.Japan]      0.002086   2.230305  0.271509  2.676674e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.224160  10.453546  2.184978  3.903699e-01    1.000000e+00\n",
      "country[T.Poland]     0.006574   7.361500  0.861632  7.717898e-01    1.000000e+00\n",
      "country[T.Romania]    0.001314   1.332315  0.169550  1.017997e-01    1.000000e+00\n",
      "country[T.Russia]     0.489784   2.912397  1.198986  6.417643e-01    1.000000e+00\n",
      "country[T.Serbia]     0.307542   3.974722  1.249501  6.737854e-01    1.000000e+00\n",
      "country[T.Thailand]   0.016030  21.891625  2.167861  5.891392e-01    1.000000e+00\n",
      "country[T.UK]         0.090766   1.650129  0.474240  2.455976e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.460410   1.912974  0.938276  7.313342e-01    1.000000e+00\n",
      "bmi_q                 0.728913   1.401755  1.004288  8.042389e-01    1.000000e+00\n",
      "age_group             0.812466   1.509723  1.102736  5.117520e-01    1.000000e+00\n",
      "Intercept             0.007925   0.098814  0.029958  1.676333e-10    4.676968e-08\n",
      "Running logistic regression with parameter bmi_q, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 867\n",
      "All counts for signature DBS78C: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.961990  2.513230  1.538333  7.202215e-02    1.000000e+00\n",
      "country[T.Brazil]     0.483759  2.388685  1.115913  7.588033e-01    1.000000e+00\n",
      "country[T.Canada]     0.971316  4.240966  2.064478  5.905395e-02    1.000000e+00\n",
      "country[T.Japan]      0.100401  1.720851  0.517156  3.056715e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.109295  4.508927  1.029403  8.678320e-01    1.000000e+00\n",
      "country[T.Poland]     0.002737  2.841074  0.354533  3.980920e-01    1.000000e+00\n",
      "country[T.Romania]    0.298324  2.144013  0.869964  7.527757e-01    1.000000e+00\n",
      "country[T.Russia]     0.481159  1.786781  0.936049  8.182058e-01    1.000000e+00\n",
      "country[T.Serbia]     0.373380  2.402577  1.011170  9.060775e-01    1.000000e+00\n",
      "country[T.Thailand]   0.006586  8.401815  0.880150  8.738450e-01    1.000000e+00\n",
      "country[T.UK]         0.695671  2.733715  1.398235  3.348950e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.766316  1.911872  1.206851  4.086064e-01    1.000000e+00\n",
      "bmi_q                 0.714530  1.082079  0.878125  2.187759e-01    1.000000e+00\n",
      "age_group             0.960300  1.425487  1.167454  1.201964e-01    1.000000e+00\n",
      "Intercept             0.028823  0.146300  0.066654  4.316610e-13    1.204334e-10\n",
      "Running logistic regression with parameter bmi_q, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 903\n",
      "All counts for signature DBS78D: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.432541    1.562809   0.821689  4.999765e-01    1.000000e+00\n",
      "country[T.Brazil]      0.087573    4.485892   0.867858  7.158698e-01    1.000000e+00\n",
      "country[T.Canada]      0.337609    8.480256   1.943434  4.111318e-01    1.000000e+00\n",
      "country[T.Japan]       0.161938    8.869881   1.642537  5.455244e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.429613   25.772378   4.492553  1.599024e-01    1.000000e+00\n",
      "country[T.Poland]      0.721284   44.336221   7.593158  7.547899e-02    1.000000e+00\n",
      "country[T.Romania]    16.838298  127.544758  42.529405  2.725773e-19    7.604906e-17\n",
      "country[T.Russia]      0.317461    4.426815   1.215516  6.543036e-01    1.000000e+00\n",
      "country[T.Serbia]      3.621650   32.925875  10.315454  1.141338e-05    3.184334e-03\n",
      "country[T.Thailand]    0.017764   29.415436   2.497753  5.308839e-01    1.000000e+00\n",
      "country[T.UK]          0.190418    4.626854   1.082105  7.259489e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.416887    1.601016   0.819254  4.974690e-01    1.000000e+00\n",
      "bmi_q                  0.619832    1.117564   0.833546  2.113121e-01    1.000000e+00\n",
      "age_group              1.151590    2.035653   1.519204  2.708046e-03    7.555449e-01\n",
      "Intercept              0.003404    0.043829   0.013259  3.357435e-16    9.367244e-14\n",
      "Running logistic regression with parameter bmi_q, signature ID1\n",
      "Zero counts for signature ID1: 480\n",
      "All counts for signature ID1: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1306.7723 \n",
      "Link Function:         Logit             BIC:             -5212.4797\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -638.39   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -665.42   \n",
      "No. Observations:      960               Deviance:        1276.8    \n",
      "Df Model:              14                Pearson chi2:    961.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.2038   0.2391 -0.8523 0.3941 -0.6725  0.2649\n",
      "sex[T.Male]           0.1959   0.1425  1.3751 0.1691 -0.0833  0.4752\n",
      "country[T.Brazil]     0.1557   0.2446  0.6364 0.5245 -0.3237  0.6351\n",
      "country[T.Canada]     0.5095   0.2775  1.8362 0.0663 -0.0344  1.0534\n",
      "country[T.Japan]     -0.1038   0.3683 -0.2818 0.7781 -0.8257  0.6181\n",
      "country[T.Lithuania] -0.7929   0.5626 -1.4094 0.1587 -1.8956  0.3097\n",
      "country[T.Poland]    -1.6066   0.7837 -2.0501 0.0404 -3.1426 -0.0707\n",
      "country[T.Romania]   -0.3808   0.2890 -1.3176 0.1876 -0.9473  0.1857\n",
      "country[T.Russia]     0.2506   0.1923  1.3029 0.1926 -0.1264  0.6276\n",
      "country[T.Serbia]    -0.4275   0.2864 -1.4926 0.1355 -0.9889  0.1338\n",
      "country[T.Thailand]   1.1209   1.1354  0.9872 0.3235 -1.1045  3.3462\n",
      "country[T.UK]        -0.1622   0.2298 -0.7059 0.4803 -0.6126  0.2882\n",
      "tobacco_ever[T.Yes]  -0.2874   0.1417 -2.0276 0.0426 -0.5652 -0.0096\n",
      "bmi_q                -0.1554   0.0647 -2.4010 0.0164 -0.2823 -0.0285\n",
      "age_group             0.2518   0.0608  4.1436 0.0000  0.1327  0.3709\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.510434   1.303287  0.815624  0.394073        1.000000\n",
      "sex[T.Male]           0.920041   1.608313  1.216435  0.169109        1.000000\n",
      "country[T.Brazil]     0.723437   1.887127  1.168425  0.524525        1.000000\n",
      "country[T.Canada]     0.966227   2.867488  1.664525  0.066334        1.000000\n",
      "country[T.Japan]      0.437937   1.855366  0.901407  0.778081        1.000000\n",
      "country[T.Lithuania]  0.150233   1.363072  0.452525  0.158717        1.000000\n",
      "country[T.Poland]     0.043171   0.931767  0.200563  0.040350        1.000000\n",
      "country[T.Romania]    0.387771   1.204002  0.683284  0.187622        1.000000\n",
      "country[T.Russia]     0.881292   1.873053  1.284798  0.192592        1.000000\n",
      "country[T.Serbia]     0.371996   1.143217  0.652129  0.135530        1.000000\n",
      "country[T.Thailand]   0.331391  28.394122  3.067501  0.323541        1.000000\n",
      "country[T.UK]         0.541945   1.334009  0.850270  0.480281        1.000000\n",
      "tobacco_ever[T.Yes]   0.568241   0.990466  0.750215  0.042606        1.000000\n",
      "bmi_q                 0.754065   0.971857  0.856063  0.016351        1.000000\n",
      "age_group             1.141882   1.448970  1.286294  0.000034        0.009539\n",
      "Running logistic regression with parameter bmi_q, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 944\n",
      "All counts for signature ID2: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.369776   3.012828  1.023460  0.581504        1.000000\n",
      "country[T.Brazil]     0.001738   2.078385  0.229645  0.176249        1.000000\n",
      "country[T.Canada]     0.002548   3.102177  0.337686  0.392213        1.000000\n",
      "country[T.Japan]      0.613010  17.257856  3.694218  0.113062        1.000000\n",
      "country[T.Lithuania]  0.010530  14.047215  1.419038  0.477182        1.000000\n",
      "country[T.Poland]     0.012278  16.206733  1.651915  0.503028        1.000000\n",
      "country[T.Romania]    0.330348   7.939085  1.868491  0.317595        1.000000\n",
      "country[T.Russia]     0.228255   3.204798  0.876706  0.541732        1.000000\n",
      "country[T.Serbia]     0.002408   2.923938  0.318998  0.284575        1.000000\n",
      "country[T.Thailand]   0.030003  48.982751  4.192896  0.320238        1.000000\n",
      "country[T.UK]         0.346795   5.879608  1.526593  0.385264        1.000000\n",
      "tobacco_ever[T.Yes]   0.252285   2.048410  0.728482  0.402147        1.000000\n",
      "bmi_q                 0.629235   1.630932  1.002425  0.561855        1.000000\n",
      "age_group             0.557851   1.328596  0.858046  0.376519        1.000000\n",
      "Intercept             0.006500   0.160672  0.036502  0.000002        0.000471\n",
      "Running logistic regression with parameter bmi_q, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 919\n",
      "All counts for signature ID3: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           0.471295   1.827181  0.917659  7.028840e-01    1.000000e+00\n",
      "country[T.Brazil]     0.254841   3.379363  1.041028  7.277895e-01    1.000000e+00\n",
      "country[T.Canada]     0.164961   3.257410  0.884852  8.409051e-01    1.000000e+00\n",
      "country[T.Japan]      0.271172   5.766745  1.495901  5.454562e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.006393   7.319354  0.841145  7.193727e-01    1.000000e+00\n",
      "country[T.Poland]     0.287793  14.364210  2.862166  2.847142e-01    1.000000e+00\n",
      "country[T.Romania]    2.383619  15.058336  5.919203  1.640669e-04    4.577466e-02\n",
      "country[T.Russia]     0.311901   2.555067  0.919343  7.271411e-01    1.000000e+00\n",
      "country[T.Serbia]     0.061615   2.647340  0.585586  4.809871e-01    1.000000e+00\n",
      "country[T.Thailand]   0.772262  54.427408  8.437241  6.996137e-02    1.000000e+00\n",
      "country[T.UK]         0.385159   3.512639  1.220294  6.113386e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.869829   3.458802  1.711895  1.122377e-01    1.000000e+00\n",
      "bmi_q                 0.651113   1.184831  0.875976  3.630872e-01    1.000000e+00\n",
      "age_group             0.840316   1.495660  1.116578  4.200504e-01    1.000000e+00\n",
      "Intercept             0.009083   0.092849  0.030829  7.162182e-12    1.998249e-09\n",
      "Running logistic regression with parameter bmi_q, signature ID5\n",
      "Zero counts for signature ID5: 480\n",
      "All counts for signature ID5: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1146.1444 \n",
      "Link Function:         Logit             BIC:             -5373.1076\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -558.07   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -665.42   \n",
      "No. Observations:      960               Deviance:        1116.1    \n",
      "Df Model:              14                Pearson chi2:    964.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.5498   0.2714 -5.7098 0.0000 -2.0818 -1.0178\n",
      "sex[T.Male]           0.4956   0.1566  3.1653 0.0015  0.1887  0.8025\n",
      "country[T.Brazil]    -0.9367   0.2700 -3.4695 0.0005 -1.4659 -0.4076\n",
      "country[T.Canada]    -0.8375   0.2978 -2.8121 0.0049 -1.4212 -0.2538\n",
      "country[T.Japan]     -2.7565   0.5031 -5.4788 0.0000 -3.7425 -1.7704\n",
      "country[T.Lithuania] -0.1237   0.5658 -0.2187 0.8269 -1.2326  0.9852\n",
      "country[T.Poland]    -0.9854   0.6677 -1.4757 0.1400 -2.2941  0.3234\n",
      "country[T.Romania]    0.3394   0.3304  1.0273 0.3043 -0.3081  0.9869\n",
      "country[T.Russia]    -0.4576   0.2066 -2.2151 0.0268 -0.8625 -0.0527\n",
      "country[T.Serbia]    -0.0428   0.3046 -0.1406 0.8882 -0.6399  0.5542\n",
      "country[T.Thailand]  -2.3576   1.1578 -2.0362 0.0417 -4.6269 -0.0883\n",
      "country[T.UK]        -0.3721   0.2486 -1.4967 0.1345 -0.8593  0.1152\n",
      "tobacco_ever[T.Yes]  -0.0985   0.1541 -0.6392 0.5227 -0.4006  0.2036\n",
      "bmi_q                 0.0458   0.0703  0.6515 0.5147 -0.0920  0.1836\n",
      "age_group             0.8236   0.0740 11.1283 0.0000  0.6785  0.9686\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.124705  0.361382  0.212288  1.130959e-08    3.155374e-06\n",
      "sex[T.Male]           1.207730  2.231199  1.641550  1.548989e-03    4.321679e-01\n",
      "country[T.Brazil]     0.230864  0.665273  0.391903  5.215189e-04    1.455038e-01\n",
      "country[T.Canada]     0.241435  0.775864  0.432806  4.921532e-03    1.000000e+00\n",
      "country[T.Japan]      0.023694  0.170270  0.063516  4.282623e-08    1.194852e-05\n",
      "country[T.Lithuania]  0.291521  2.678231  0.883607  8.268731e-01    1.000000e+00\n",
      "country[T.Poland]     0.100852  1.381791  0.373304  1.400320e-01    1.000000e+00\n",
      "country[T.Romania]    0.734817  2.682824  1.404060  3.043009e-01    1.000000e+00\n",
      "country[T.Russia]     0.422110  0.948664  0.632804  2.675507e-02    1.000000e+00\n",
      "country[T.Serbia]     0.527368  1.740573  0.958083  8.882058e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009785  0.915465  0.094646  4.172553e-02    1.000000e+00\n",
      "country[T.UK]         0.423467  1.122047  0.689311  1.344653e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.669894  1.225796  0.906175  5.227125e-01    1.000000e+00\n",
      "bmi_q                 0.912094  1.201580  1.046878  5.147307e-01    1.000000e+00\n",
      "age_group             1.970932  2.634264  2.278587  9.140054e-29    2.550075e-26\n",
      "Running logistic regression with parameter bmi_q, signature ID8\n",
      "Zero counts for signature ID8: 260\n",
      "All counts for signature ID8: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1056.5673 \n",
      "Link Function:         Logit             BIC:             -5462.6847\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -513.28   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -560.72   \n",
      "No. Observations:      960               Deviance:        1026.6    \n",
      "Df Model:              14                Pearson chi2:    954.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.0256   0.2705  0.0945 0.9247 -0.5046  0.5557\n",
      "sex[T.Male]           0.4094   0.1620  2.5277 0.0115  0.0920  0.7269\n",
      "country[T.Brazil]    -0.7840   0.2744 -2.8573 0.0043 -1.3218 -0.2462\n",
      "country[T.Canada]    -0.2564   0.3325 -0.7713 0.4405 -0.9081  0.3952\n",
      "country[T.Japan]     -1.4834   0.4010 -3.6997 0.0002 -2.2692 -0.6976\n",
      "country[T.Lithuania]  0.4435   0.7855  0.5646 0.5723 -1.0960  1.9830\n",
      "country[T.Poland]    -0.2793   0.6478 -0.4312 0.6663 -1.5490  0.9903\n",
      "country[T.Romania]   -0.6853   0.3276 -2.0920 0.0364 -1.3273 -0.0432\n",
      "country[T.Russia]    -0.1179   0.2280 -0.5170 0.6052 -0.5647  0.3290\n",
      "country[T.Serbia]    -0.6879   0.3093 -2.2244 0.0261 -1.2941 -0.0818\n",
      "country[T.Thailand]  -1.2792   0.9443 -1.3547 0.1755 -3.1299  0.5715\n",
      "country[T.UK]        -0.2487   0.2821 -0.8815 0.3781 -0.8015  0.3042\n",
      "tobacco_ever[T.Yes]  -0.0114   0.1621 -0.0704 0.9439 -0.3292  0.3063\n",
      "bmi_q                 0.0282   0.0738  0.3825 0.7021 -0.1163  0.1728\n",
      "age_group             0.5563   0.0717  7.7571 0.0000  0.4158  0.6969\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.603745  1.743229  1.025898  9.246947e-01    1.000000e+00\n",
      "sex[T.Male]           1.096311  2.068589  1.505927  1.148239e-02    1.000000e+00\n",
      "country[T.Brazil]     0.266654  0.781748  0.456570  4.272356e-03    1.000000e+00\n",
      "country[T.Canada]     0.403301  1.484662  0.773800  4.405149e-01    1.000000e+00\n",
      "country[T.Japan]      0.103390  0.497803  0.226865  2.158497e-04    6.022208e-02\n",
      "country[T.Lithuania]  0.334198  7.264403  1.558124  5.723435e-01    1.000000e+00\n",
      "country[T.Poland]     0.212464  2.692015  0.756278  6.662991e-01    1.000000e+00\n",
      "country[T.Romania]    0.265199  0.957676  0.503959  3.643969e-02    1.000000e+00\n",
      "country[T.Russia]     0.568503  1.389591  0.888812  6.051807e-01    1.000000e+00\n",
      "country[T.Serbia]     0.274158  0.921464  0.502620  2.611872e-02    1.000000e+00\n",
      "country[T.Thailand]   0.043724  1.771002  0.278272  1.755210e-01    1.000000e+00\n",
      "country[T.UK]         0.448638  1.355587  0.779851  3.780677e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.719532  1.358424  0.988650  9.438668e-01    1.000000e+00\n",
      "bmi_q                 0.890166  1.188591  1.028612  7.021005e-01    1.000000e+00\n",
      "age_group             1.515514  2.007482  1.744238  8.690070e-15    2.424529e-12\n",
      "Running logistic regression with parameter bmi_q, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter bmi_q, sig ID9, perfect or near-perfect separation for category 1. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 956\n",
      "All counts for signature ID9: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.185411   12.103589  1.188608  0.654725        1.000000\n",
      "country[T.Brazil]     0.004294    7.424775  0.595136  0.481591        1.000000\n",
      "country[T.Canada]     0.163415   15.249140  1.878960  0.524037        1.000000\n",
      "country[T.Japan]      0.006462   14.567439  0.949102  0.639376        1.000000\n",
      "country[T.Lithuania]  0.022756   47.327568  3.276758  0.362041        1.000000\n",
      "country[T.Poland]     0.024397   49.094687  3.518040  0.367295        1.000000\n",
      "country[T.Romania]    0.006207   10.681556  0.858900  0.514717        1.000000\n",
      "country[T.Russia]     0.002185    4.140735  0.308982  0.338276        1.000000\n",
      "country[T.Serbia]     0.215324   21.542932  2.555666  0.349790        1.000000\n",
      "country[T.Thailand]   0.062227  165.331730  9.422135  0.230020        1.000000\n",
      "country[T.UK]         0.002832    5.051977  0.395607  0.357766        1.000000\n",
      "tobacco_ever[T.Yes]   0.314222   25.130651  2.177803  0.325858        1.000000\n",
      "bmi_q                 0.371324    2.319819  0.893218  0.633967        1.000000\n",
      "age_group             0.546135    3.214880  1.243657  0.529415        1.000000\n",
      "Intercept             0.000086    0.088408  0.005026  0.000026        0.007201\n",
      "Running logistic regression with parameter bmi_q, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter bmi_q, sig ID11, perfect or near-perfect separation for category 2. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 952\n",
      "All counts for signature ID11: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.133001   2.357359  0.595434  0.378471          1.0000\n",
      "country[T.Brazil]     0.002536   3.908732  0.348758  0.382024          1.0000\n",
      "country[T.Canada]     0.171241  13.047316  1.903303  0.492275          1.0000\n",
      "country[T.Japan]      0.011241  18.552238  1.564973  0.571985          1.0000\n",
      "country[T.Lithuania]  0.009788  17.264332  1.388300  0.453476          1.0000\n",
      "country[T.Poland]     0.036407  68.694136  5.166811  0.308111          1.0000\n",
      "country[T.Romania]    0.003603   5.273813  0.490467  0.500398          1.0000\n",
      "country[T.Russia]     0.209428   5.435973  1.063790  0.621547          1.0000\n",
      "country[T.Serbia]     0.004353   6.379214  0.592201  0.522938          1.0000\n",
      "country[T.Thailand]   0.021681  43.010133  3.127044  0.391277          1.0000\n",
      "country[T.UK]         0.105196   7.261744  1.124700  0.658519          1.0000\n",
      "tobacco_ever[T.Yes]   0.022751   1.164667  0.224118  0.066415          1.0000\n",
      "bmi_q                 0.645989   2.435618  1.214120  0.448315          1.0000\n",
      "age_group             0.691513   2.530577  1.273696  0.380130          1.0000\n",
      "Intercept             0.000890   0.115395  0.013607  0.000009          0.0024\n",
      "Running logistic regression with parameter bmi_q, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 953\n",
      "All counts for signature ID12: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]           0.136571      2.579631   0.616679  0.380073        1.000000\n",
      "country[T.Brazil]     1.402751   2072.029956  15.223259  0.017008        1.000000\n",
      "country[T.Canada]     0.023374    820.342647   4.377946  0.456402        1.000000\n",
      "country[T.Japan]      0.037137   1395.612826   7.196651  0.229911        1.000000\n",
      "country[T.Lithuania]  0.123640   4657.956485  23.948717  0.118623        1.000000\n",
      "country[T.Poland]     0.048410   1971.460081   9.843988  0.208320        1.000000\n",
      "country[T.Romania]    0.018370    654.288923   3.472018  0.249846        1.000000\n",
      "country[T.Russia]     0.508214    736.610185   5.431431  0.136902        1.000000\n",
      "country[T.Serbia]     0.011804    424.767838   2.242260  0.377334        1.000000\n",
      "country[T.Thailand]   0.296222  13225.407365  62.261285  0.071296        1.000000\n",
      "country[T.UK]         0.449100   1261.348592   8.575649  0.091808        1.000000\n",
      "tobacco_ever[T.Yes]   0.120538      2.840512   0.667886  0.448094        1.000000\n",
      "bmi_q                 0.400975      1.482905   0.766759  0.304742        1.000000\n",
      "age_group             0.209974      0.913978   0.468577  0.020666        1.000000\n",
      "Intercept             0.000133      0.238403   0.019596  0.000461        0.128481\n",
      "Running logistic regression with parameter bmi_q, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 943\n",
      "All counts for signature ID83C: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]            0.390949      3.565619    1.147688  6.456781e-01    1.000000e+00\n",
      "country[T.Brazil]      0.015916    548.940734    2.955817  4.499087e-01    1.000000e+00\n",
      "country[T.Canada]      0.542695   1557.305426   10.522712  1.134510e-01    1.000000e+00\n",
      "country[T.Japan]       0.034119   1216.814312    6.443326  2.963793e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.072158   2601.048868   13.699588  1.847014e-01    1.000000e+00\n",
      "country[T.Poland]      0.136069   4884.714155   25.762568  1.321105e-01    1.000000e+00\n",
      "country[T.Romania]    16.696750  16683.288026  129.569066  2.044699e-10    5.704710e-08\n",
      "country[T.Russia]      0.007898    274.019152    1.471065  6.223331e-01    1.000000e+00\n",
      "country[T.Serbia]      3.096506   4492.670966   33.088733  2.281902e-03    6.366506e-01\n",
      "country[T.Thailand]    0.184141   7231.926867   36.475521  1.135459e-01    1.000000e+00\n",
      "country[T.UK]          0.012016    413.701459    2.229580  5.140847e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.243581      2.383619    0.777674  5.225106e-01    1.000000e+00\n",
      "bmi_q                  0.647229      1.787571    1.060712  6.469751e-01    1.000000e+00\n",
      "age_group              0.929730      2.487164    1.481884  9.240974e-02    1.000000e+00\n",
      "Intercept              0.000005      0.010209    0.000790  9.546411e-15    2.663449e-12\n",
      "Running logistic regression with parameter bmi_q, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 481\n",
      "All counts for signature SBS_burden: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.445977   2.756741  1.991254  2.158398e-05    6.021932e-03\n",
      "country[T.Brazil]     0.330603   0.982284  0.571763  4.285400e-02    1.000000e+00\n",
      "country[T.Canada]     0.185532   0.630943  0.344857  5.145174e-04    1.435504e-01\n",
      "country[T.Japan]      0.172207   0.903714  0.394182  2.801450e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.303956   2.743712  0.899107  8.359267e-01    1.000000e+00\n",
      "country[T.Poland]     0.190620   2.500407  0.714744  5.909723e-01    1.000000e+00\n",
      "country[T.Romania]    3.425807  18.987609  7.650035  1.002364e-07    2.796595e-05\n",
      "country[T.Russia]     0.489403   1.133494  0.745206  1.690577e-01    1.000000e+00\n",
      "country[T.Serbia]     0.784489   2.716983  1.454020  2.346951e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000242   0.344620  0.033175  2.098188e-03    5.853944e-01\n",
      "country[T.UK]         0.468864   1.281702  0.775220  3.188413e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.860261   1.615189  1.178020  3.011973e-01    1.000000e+00\n",
      "bmi_q                 0.785034   1.046260  0.906437  1.794366e-01    1.000000e+00\n",
      "age_group             2.372024   3.264613  2.771492  1.660068e-47    4.631590e-45\n",
      "Intercept             0.063114   0.197444  0.112735  2.193983e-15    6.121212e-13\n",
      "Running logistic regression with parameter bmi_q, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 530\n",
      "All counts for signature DBS_burden: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]           1.160598  2.186135  1.589975  3.811146e-03    1.000000e+00\n",
      "country[T.Brazil]     0.380268  1.074204  0.640937  9.141805e-02    1.000000e+00\n",
      "country[T.Canada]     0.230554  0.744256  0.417169  2.978212e-03    8.309211e-01\n",
      "country[T.Japan]      0.090616  0.470523  0.211624  1.129329e-04    3.150829e-02\n",
      "country[T.Lithuania]  0.538904  5.079717  1.566260  4.153852e-01    1.000000e+00\n",
      "country[T.Poland]     0.198455  2.258823  0.693628  5.398315e-01    1.000000e+00\n",
      "country[T.Romania]    1.784090  8.584144  3.768596  3.560886e-04    9.934872e-02\n",
      "country[T.Russia]     0.503145  1.160244  0.764465  2.068528e-01    1.000000e+00\n",
      "country[T.Serbia]     0.743290  2.416190  1.335422  3.335526e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000354  0.454774  0.047437  4.873448e-03    1.000000e+00\n",
      "country[T.UK]         0.622766  1.832823  1.064339  8.146629e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.800470  1.493467  1.092890  5.658211e-01    1.000000e+00\n",
      "bmi_q                 0.862687  1.145132  0.993792  9.293767e-01    1.000000e+00\n",
      "age_group             1.779536  2.380960  2.052532  8.322958e-26    2.322105e-23\n",
      "Intercept             0.119122  0.349223  0.205484  2.466111e-09    6.880449e-07\n",
      "Running logistic regression with parameter bmi_q, signature ID_burden\n",
      "Zero counts for signature ID_burden: 482\n",
      "All counts for signature ID_burden: 960\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1113.6974 \n",
      "Link Function:         Logit             BIC:             -5405.5546\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -541.85   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -665.41   \n",
      "No. Observations:      960               Deviance:        1083.7    \n",
      "Df Model:              14                Pearson chi2:    959.      \n",
      "Df Residuals:          945               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.8473   0.2805 -6.5865 0.0000 -2.3970 -1.2976\n",
      "sex[T.Male]           0.6051   0.1605  3.7697 0.0002  0.2905  0.9197\n",
      "country[T.Brazil]    -0.8416   0.2734 -3.0779 0.0021 -1.3774 -0.3057\n",
      "country[T.Canada]    -1.0896   0.3071 -3.5484 0.0004 -1.6914 -0.4878\n",
      "country[T.Japan]     -2.5990   0.4868 -5.3391 0.0000 -3.5531 -1.6449\n",
      "country[T.Lithuania] -0.6706   0.5577 -1.2025 0.2292 -1.7636  0.4224\n",
      "country[T.Poland]    -0.9049   0.6791 -1.3324 0.1827 -2.2359  0.4262\n",
      "country[T.Romania]    0.8067   0.3551  2.2715 0.0231  0.1106  1.5027\n",
      "country[T.Russia]    -0.3353   0.2098 -1.5983 0.1100 -0.7464  0.0759\n",
      "country[T.Serbia]    -0.0900   0.3102 -0.2902 0.7717 -0.6980  0.5179\n",
      "country[T.Thailand]  -2.3691   1.1646 -2.0342 0.0419 -4.6518 -0.0865\n",
      "country[T.UK]        -0.2850   0.2529 -1.1270 0.2598 -0.7807  0.2107\n",
      "tobacco_ever[T.Yes]  -0.1793   0.1570 -1.1425 0.2532 -0.4870  0.1283\n",
      "bmi_q                 0.0601   0.0717  0.8377 0.4022 -0.0805  0.2007\n",
      "age_group             0.9190   0.0774 11.8723 0.0000  0.7673  1.0708\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.090988  0.273186  0.157660  4.503546e-11    1.256489e-08\n",
      "sex[T.Male]           1.337069  2.508439  1.831381  1.634338e-04    4.559803e-02\n",
      "country[T.Brazil]     0.252221  0.736628  0.431037  2.084442e-03    5.815594e-01\n",
      "country[T.Canada]     0.184257  0.614003  0.336355  3.875507e-04    1.081267e-01\n",
      "country[T.Japan]      0.028635  0.193025  0.074346  9.339597e-08    2.605747e-05\n",
      "country[T.Lithuania]  0.171434  1.525622  0.511413  2.291701e-01    1.000000e+00\n",
      "country[T.Poland]     0.106893  1.531411  0.404596  1.827272e-01    1.000000e+00\n",
      "country[T.Romania]    1.116983  4.493752  2.240412  2.311711e-02    1.000000e+00\n",
      "country[T.Russia]     0.474072  1.078811  0.715146  1.099714e-01    1.000000e+00\n",
      "country[T.Serbia]     0.497588  1.678582  0.913916  7.716670e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009544  0.917153  0.093561  4.192954e-02    1.000000e+00\n",
      "country[T.UK]         0.458099  1.234500  0.752013  2.597635e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.614492  1.136901  0.835833  2.532408e-01    1.000000e+00\n",
      "bmi_q                 0.922654  1.222224  1.061927  4.022138e-01    1.000000e+00\n",
      "age_group             2.153987  2.917612  2.506891  1.649053e-32    4.600859e-30\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1, its frequency is 0.77\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.86\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter hypert, signature SBS1\n",
      "Zero counts for signature SBS1: 479\n",
      "All counts for signature SBS1: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1256.1760 \n",
      "Link Function:         Logit             BIC:             -5239.5268\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -613.09   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -663.34   \n",
      "No. Observations:      957               Deviance:        1226.2    \n",
      "Df Model:              14                Pearson chi2:    955.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.9841   0.2248 -4.3779 0.0000 -1.4247 -0.5435\n",
      "hypert[T.Yes]         0.0344   0.1424  0.2419 0.8088 -0.2446  0.3134\n",
      "sex[T.Male]          -0.0517   0.1460 -0.3539 0.7234 -0.3378  0.2345\n",
      "country[T.Brazil]     0.1235   0.2477  0.4986 0.6180 -0.3620  0.6090\n",
      "country[T.Canada]     0.7858   0.2889  2.7201 0.0065  0.2196  1.3520\n",
      "country[T.Japan]      0.2806   0.3736  0.7511 0.4526 -0.4516  1.0128\n",
      "country[T.Lithuania]  0.4062   0.5522  0.7357 0.4619 -0.6760  1.4884\n",
      "country[T.Poland]    -0.9429   0.6862 -1.3740 0.1695 -2.2879  0.4021\n",
      "country[T.Romania]   -1.0109   0.3186 -3.1732 0.0015 -1.6353 -0.3865\n",
      "country[T.Russia]     0.5007   0.1954  2.5625 0.0104  0.1177  0.8837\n",
      "country[T.Serbia]    -0.7552   0.3075 -2.4559 0.0141 -1.3580 -0.1525\n",
      "country[T.Thailand]   0.1657   0.9361  0.1770 0.8595 -1.6690  2.0004\n",
      "country[T.UK]         0.4256   0.2348  1.8123 0.0699 -0.0347  0.8859\n",
      "tobacco_ever[T.Yes]   0.0142   0.1456  0.0974 0.9224 -0.2711  0.2995\n",
      "age_group             0.4367   0.0654  6.6777 0.0000  0.3085  0.5649\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.240571  0.580684  0.373759  1.198206e-05    3.342995e-03\n",
      "hypert[T.Yes]         0.783040  1.368131  1.035037  8.088468e-01    1.000000e+00\n",
      "sex[T.Male]           0.713352  1.264213  0.949647  7.234008e-01    1.000000e+00\n",
      "country[T.Brazil]     0.696298  1.838589  1.131462  6.180436e-01    1.000000e+00\n",
      "country[T.Canada]     1.245559  3.865158  2.194147  6.526884e-03    1.000000e+00\n",
      "country[T.Japan]      0.636611  2.753215  1.323906  4.525945e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.508644  4.430210  1.501133  4.619232e-01    1.000000e+00\n",
      "country[T.Poland]     0.101479  1.495026  0.389505  1.694537e-01    1.000000e+00\n",
      "country[T.Romania]    0.194888  0.679424  0.363884  1.507782e-03    4.206713e-01\n",
      "country[T.Russia]     1.124938  2.419798  1.649885  1.039345e-02    1.000000e+00\n",
      "country[T.Serbia]     0.257184  0.858560  0.469902  1.405479e-02    1.000000e+00\n",
      "country[T.Thailand]   0.188436  7.391898  1.180210  8.595035e-01    1.000000e+00\n",
      "country[T.UK]         0.965919  2.425145  1.530521  6.993799e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.762517  1.349154  1.014274  9.224351e-01    1.000000e+00\n",
      "age_group             1.361425  1.759248  1.547606  2.426709e-11    6.770517e-09\n",
      "Running logistic regression with parameter hypert, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 951\n",
      "All counts for signature SBS2: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.225929    5.454423  1.093360  5.806840e-01        1.000000\n",
      "sex[T.Male]           0.250267    7.187291  1.198388  5.540651e-01        1.000000\n",
      "country[T.Brazil]     0.003971    6.885492  0.550646  4.711811e-01        1.000000\n",
      "country[T.Canada]     0.005022    8.871604  0.699260  7.327046e-01        1.000000\n",
      "country[T.Japan]      0.009614   17.636123  1.352278  4.739533e-01        1.000000\n",
      "country[T.Lithuania]  0.023055   42.370961  3.241573  3.413562e-01        1.000000\n",
      "country[T.Poland]     0.027031   52.614476  3.840901  3.115570e-01        1.000000\n",
      "country[T.Romania]    0.005936   10.351468  0.824192  5.412850e-01        1.000000\n",
      "country[T.Russia]     0.178133    8.372665  1.217092  5.378732e-01        1.000000\n",
      "country[T.Serbia]     0.005489    9.827568  0.766941  4.771912e-01        1.000000\n",
      "country[T.Thailand]   0.059792  143.980133  8.917408  2.020231e-01        1.000000\n",
      "country[T.UK]         0.347535   15.060320  2.289290  2.521959e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.185848    4.877362  0.943370  5.391946e-01        1.000000\n",
      "age_group             0.488568    2.123862  1.013366  5.976375e-01        1.000000\n",
      "Intercept             0.000626    0.078624  0.009747  8.557032e-08        0.000024\n",
      "Running logistic regression with parameter hypert, signature SBS4\n",
      "Zero counts for signature SBS4: 407\n",
      "All counts for signature SBS4: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1245.9192 \n",
      "Link Function:         Logit             BIC:             -5249.7836\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -607.96   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -652.62   \n",
      "No. Observations:      957               Deviance:        1215.9    \n",
      "Df Model:              14                Pearson chi2:    951.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.3577   0.2224 -1.6081 0.1078 -0.7936  0.0783\n",
      "hypert[T.Yes]         0.2233   0.1431  1.5607 0.1186 -0.0571  0.5036\n",
      "sex[T.Male]          -0.1850   0.1467 -1.2611 0.2073 -0.4724  0.1025\n",
      "country[T.Brazil]    -0.6495   0.2517 -2.5807 0.0099 -1.1427 -0.1562\n",
      "country[T.Canada]    -0.5537   0.2796 -1.9805 0.0476 -1.1017 -0.0058\n",
      "country[T.Japan]      0.8271   0.4742  1.7442 0.0811 -0.1023  1.7565\n",
      "country[T.Lithuania] -0.3090   0.5541 -0.5575 0.5772 -1.3951  0.7771\n",
      "country[T.Poland]     0.7633   0.6939  1.1000 0.2713 -0.5967  2.1233\n",
      "country[T.Romania]   -0.8842   0.2933 -3.0147 0.0026 -1.4590 -0.3093\n",
      "country[T.Russia]    -0.3555   0.1962 -1.8118 0.0700 -0.7400  0.0291\n",
      "country[T.Serbia]    -0.5776   0.2844 -2.0309 0.0423 -1.1351 -0.0202\n",
      "country[T.Thailand]  -0.3469   0.9324 -0.3721 0.7098 -2.1743  1.4805\n",
      "country[T.UK]        -0.0182   0.2453 -0.0742 0.9408 -0.4991  0.4627\n",
      "tobacco_ever[T.Yes]   0.6574   0.1465  4.4881 0.0000  0.3703  0.9444\n",
      "age_group             0.3173   0.0644  4.9228 0.0000  0.1909  0.4436\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.452203  1.081407  0.699296  1.078131e-01        1.000000\n",
      "hypert[T.Yes]         0.944489  1.654742  1.250154  1.185872e-01        1.000000\n",
      "sex[T.Male]           0.623486  1.107938  0.831134  2.072718e-01        1.000000\n",
      "country[T.Brazil]     0.318959  0.855377  0.522331  9.859997e-03        1.000000\n",
      "country[T.Canada]     0.332307  0.994262  0.574805  4.764198e-02        1.000000\n",
      "country[T.Japan]      0.902736  5.791982  2.286619  8.112862e-02        1.000000\n",
      "country[T.Lithuania]  0.247820  2.175244  0.734213  5.771587e-01        1.000000\n",
      "country[T.Poland]     0.550604  8.358428  2.145270  2.713425e-01        1.000000\n",
      "country[T.Romania]    0.232474  0.733927  0.413061  2.571924e-03        0.717567\n",
      "country[T.Russia]     0.477096  1.029505  0.700837  7.002261e-02        1.000000\n",
      "country[T.Serbia]     0.321403  0.980029  0.561235  4.226585e-02        1.000000\n",
      "country[T.Thailand]   0.113686  4.395012  0.706860  7.098254e-01        1.000000\n",
      "country[T.UK]         0.607095  1.588284  0.981957  9.408407e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.448164  2.571388  1.929713  7.185742e-06        0.002005\n",
      "age_group             1.210393  1.558258  1.373355  8.530261e-07        0.000238\n",
      "Running logistic regression with parameter hypert, signature SBS5\n",
      "Zero counts for signature SBS5: 882\n",
      "All counts for signature SBS5: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             501.8669  \n",
      "Link Function:         Logit             BIC:             -5993.8359\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -235.93   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -262.95   \n",
      "No. Observations:      957               Deviance:        471.87    \n",
      "Df Model:              14                Pearson chi2:    947.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.8039   0.4686 -8.1169 0.0000 -4.7225 -2.8854\n",
      "hypert[T.Yes]         0.3589   0.2643  1.3579 0.1745 -0.1592  0.8770\n",
      "sex[T.Male]          -0.0961   0.2713 -0.3544 0.7230 -0.6279  0.4356\n",
      "country[T.Brazil]     0.2188   0.3791  0.5772 0.5638 -0.5243  0.9619\n",
      "country[T.Canada]    -0.8901   0.5605 -1.5881 0.1123 -1.9886  0.2084\n",
      "country[T.Japan]      0.2008   0.5103  0.3934 0.6940 -0.7994  1.2010\n",
      "country[T.Lithuania]  0.1215   0.8019  0.1515 0.8796 -1.4501  1.6931\n",
      "country[T.Poland]    -0.1938   1.0918 -0.1775 0.8591 -2.3336  1.9461\n",
      "country[T.Romania]   -1.3251   0.7525 -1.7610 0.0782 -2.7999  0.1497\n",
      "country[T.Russia]    -1.1200   0.4698 -2.3840 0.0171 -2.0407 -0.1992\n",
      "country[T.Serbia]    -0.5142   0.5633 -0.9128 0.3613 -1.6183  0.5899\n",
      "country[T.Thailand]   0.4705   1.1608  0.4053 0.6852 -1.8046  2.7457\n",
      "country[T.UK]        -0.6469   0.4283 -1.5104 0.1309 -1.4863  0.1925\n",
      "tobacco_ever[T.Yes]   0.3991   0.2738  1.4576 0.1450 -0.1376  0.9357\n",
      "age_group             0.5797   0.1284  4.5162 0.0000  0.3281  0.8313\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.008893   0.055832  0.022283  4.783477e-16    1.334590e-13\n",
      "hypert[T.Yes]         0.852861   2.403785  1.431815  1.745049e-01    1.000000e+00\n",
      "sex[T.Male]           0.533732   1.545854  0.908335  7.230469e-01    1.000000e+00\n",
      "country[T.Brazil]     0.591980   2.616763  1.244617  5.638301e-01    1.000000e+00\n",
      "country[T.Canada]     0.136890   1.231699  0.410618  1.122552e-01    1.000000e+00\n",
      "country[T.Japan]      0.449584   3.323390  1.222352  6.940001e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.234544   5.436212  1.129172  8.795771e-01    1.000000e+00\n",
      "country[T.Poland]     0.096944   7.001131  0.823843  8.591269e-01    1.000000e+00\n",
      "country[T.Romania]    0.060813   1.161539  0.265776  7.824475e-02    1.000000e+00\n",
      "country[T.Russia]     0.129937   0.819381  0.326294  1.712537e-02    1.000000e+00\n",
      "country[T.Serbia]     0.198238   1.803780  0.597978  3.613443e-01    1.000000e+00\n",
      "country[T.Thailand]   0.164540  15.574837  1.600841  6.852214e-01    1.000000e+00\n",
      "country[T.UK]         0.226198   1.212304  0.523661  1.309287e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.871484   2.549013  1.490444  1.449631e-01    1.000000e+00\n",
      "age_group             1.388386   2.296376  1.785569  6.294638e-06    1.756204e-03\n",
      "Running logistic regression with parameter hypert, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 912\n",
      "All counts for signature SBS12: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]          0.390973    1.898179    0.863327  6.723165e-01    1.000000e+00\n",
      "sex[T.Male]            0.469132    2.545688    1.078673  8.006087e-01    1.000000e+00\n",
      "country[T.Brazil]      0.498615    9.546131    2.273092  2.543519e-01    1.000000e+00\n",
      "country[T.Canada]      1.336619   18.845198    4.888666  1.728315e-02    1.000000e+00\n",
      "country[T.Japan]      45.403019  514.452515  137.524583  2.451110e-24    6.838598e-22\n",
      "country[T.Lithuania]   0.013510   18.494363    1.824505  6.047615e-01    1.000000e+00\n",
      "country[T.Poland]      0.018418   25.890402    2.497895  5.310440e-01    1.000000e+00\n",
      "country[T.Romania]     0.366055    9.990679    2.149381  3.392076e-01    1.000000e+00\n",
      "country[T.Russia]      0.122331    3.319849    0.716130  6.195871e-01    1.000000e+00\n",
      "country[T.Serbia]      0.137391    7.743566    1.392406  6.445816e-01    1.000000e+00\n",
      "country[T.Thailand]    0.029186   49.401205    4.105879  4.067793e-01    1.000000e+00\n",
      "country[T.UK]          0.219570    5.887215    1.278965  6.315054e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.315922    1.689517    0.738953  4.563388e-01    1.000000e+00\n",
      "age_group              0.894065    1.782485    1.255743  1.851142e-01    1.000000e+00\n",
      "Intercept              0.003002    0.047246    0.013238  1.387779e-15    3.871905e-13\n",
      "Running logistic regression with parameter hypert, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 803\n",
      "All counts for signature SBS13: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.579153  1.200958  0.834611  3.274418e-01    1.000000e+00\n",
      "sex[T.Male]           1.101830  2.410256  1.619451  1.373100e-02    1.000000e+00\n",
      "country[T.Brazil]     0.423142  1.610071  0.848176  6.122274e-01    1.000000e+00\n",
      "country[T.Canada]     0.406985  1.714473  0.863849  6.835746e-01    1.000000e+00\n",
      "country[T.Japan]      0.116173  1.252076  0.444621  1.316909e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.369389  4.739425  1.497842  5.269298e-01    1.000000e+00\n",
      "country[T.Poland]     0.001376  1.389508  0.177484  1.161732e-01    1.000000e+00\n",
      "country[T.Romania]    0.039303  0.601723  0.195099  2.411181e-03    6.727195e-01\n",
      "country[T.Russia]     0.941889  2.419480  1.507323  8.646877e-02    1.000000e+00\n",
      "country[T.Serbia]     0.318763  1.589589  0.749560  4.581607e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004156  5.120603  0.551767  6.519146e-01    1.000000e+00\n",
      "country[T.UK]         0.676008  2.083844  1.198467  5.150179e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.855187  1.796237  1.237570  2.554227e-01    1.000000e+00\n",
      "age_group             0.927965  1.288500  1.092804  2.855725e-01    1.000000e+00\n",
      "Intercept             0.066357  0.217055  0.121783  4.491577e-14    1.253150e-11\n",
      "Running logistic regression with parameter hypert, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 886\n",
      "All counts for signature SBS18: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.375981   1.066139  0.638119  8.458743e-02    1.000000e+00\n",
      "sex[T.Male]           0.995690   2.968814  1.690838  5.042348e-02    1.000000e+00\n",
      "country[T.Brazil]     0.409123   2.196159  0.989973  8.166369e-01    1.000000e+00\n",
      "country[T.Canada]     0.221125   1.902554  0.722609  5.308957e-01    1.000000e+00\n",
      "country[T.Japan]      0.152946   2.607796  0.785577  6.600334e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.409474   8.113726  2.206230  3.036031e-01    1.000000e+00\n",
      "country[T.Poland]     0.111460   4.865564  1.067123  7.808722e-01    1.000000e+00\n",
      "country[T.Romania]    0.313271   2.300494  0.921234  7.752192e-01    1.000000e+00\n",
      "country[T.Russia]     0.291599   1.219542  0.607428  1.571245e-01    1.000000e+00\n",
      "country[T.Serbia]     0.299608   2.206571  0.882145  7.128032e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009536  12.246750  1.275742  7.703430e-01    1.000000e+00\n",
      "country[T.UK]         0.347656   1.837787  0.835385  6.076787e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.475921   1.303681  0.789400  3.401602e-01    1.000000e+00\n",
      "age_group             0.700903   1.088894  0.874499  2.236063e-01    1.000000e+00\n",
      "Intercept             0.058132   0.260652  0.126446  2.933855e-09    8.185455e-07\n",
      "Running logistic regression with parameter hypert, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 952\n",
      "All counts for signature SBS21: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.157315      5.073421   0.945048  5.981412e-01        1.000000\n",
      "sex[T.Male]           0.171346      5.137913   0.873885  5.051534e-01        1.000000\n",
      "country[T.Brazil]     0.013868    479.075471   2.577633  4.493936e-01        1.000000\n",
      "country[T.Canada]     0.018215    634.133726   3.398239  5.159357e-01        1.000000\n",
      "country[T.Japan]      0.039400   1377.171809   7.365153  2.550876e-01        1.000000\n",
      "country[T.Lithuania]  0.091964   3258.543559  17.301848  1.676291e-01        1.000000\n",
      "country[T.Poland]     0.075073   2728.167687  14.320461  1.319199e-01        1.000000\n",
      "country[T.Romania]    1.718224   3007.429570  21.640913  1.279004e-02        1.000000\n",
      "country[T.Russia]     0.160576    459.519393   3.106520  3.227841e-01        1.000000\n",
      "country[T.Serbia]     0.017276    605.576825   3.234432  3.444960e-01        1.000000\n",
      "country[T.Thailand]   0.291503  12239.079858  59.412240  7.148600e-02        1.000000\n",
      "country[T.UK]         0.936056   1630.519495  11.750168  4.549382e-02        1.000000\n",
      "tobacco_ever[T.Yes]   0.259725      8.643851   1.396798  4.366069e-01        1.000000\n",
      "age_group             0.316875      1.522130   0.712380  2.988523e-01        1.000000\n",
      "Intercept             0.000026      0.056532   0.004081  5.879082e-08        0.000016\n",
      "Running logistic regression with parameter hypert, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 887\n",
      "All counts for signature SBS22: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]           0.484140     2.120516    1.004989  7.555776e-01    1.000000e+00\n",
      "sex[T.Male]             0.286901     1.253016    0.604210  1.671823e-01    1.000000e+00\n",
      "country[T.Brazil]       1.157876    76.043583    7.240341  3.121315e-02    1.000000e+00\n",
      "country[T.Canada]       0.853824    73.909547    6.612807  6.829006e-02    1.000000e+00\n",
      "country[T.Japan]        0.014741    42.600923    2.185270  5.059261e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.033084   100.106087    4.972149  3.185345e-01    1.000000e+00\n",
      "country[T.Poland]       0.061787   194.719797    9.389578  2.293502e-01    1.000000e+00\n",
      "country[T.Romania]    121.815093  4937.852030  521.678967  1.074176e-38    2.996951e-36\n",
      "country[T.Russia]       0.003724    10.439484    0.547267  6.090969e-01    1.000000e+00\n",
      "country[T.Serbia]      17.845649   723.430213   76.909013  1.087305e-12    3.033582e-10\n",
      "country[T.Thailand]    18.028005  2462.676707  165.399274  9.839088e-06    2.745106e-03\n",
      "country[T.UK]           0.177025    27.903821    2.222666  4.024269e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.444846     2.043637    0.949140  7.571984e-01    1.000000e+00\n",
      "age_group               1.394877     2.777131    1.937844  4.506895e-05    1.257424e-02\n",
      "Intercept               0.000139     0.008564    0.001537  7.598901e-26    2.120093e-23\n",
      "Running logistic regression with parameter hypert, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 951\n",
      "All counts for signature SBS44: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.116511      3.087693   0.666199  4.621377e-01    1.000000e+00\n",
      "sex[T.Male]           0.245400      6.070461   1.094424  5.074198e-01    1.000000e+00\n",
      "country[T.Brazil]     0.445610   1253.229260   8.518081  1.159418e-01    1.000000e+00\n",
      "country[T.Canada]     0.018132    628.989692   3.376752  5.352481e-01    1.000000e+00\n",
      "country[T.Japan]      0.036425   1271.138733   6.803513  2.616924e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.094784   3364.739432  17.847570  1.647832e-01    1.000000e+00\n",
      "country[T.Poland]     0.073756   2663.001055  14.024296  1.325993e-01    1.000000e+00\n",
      "country[T.Romania]    1.750987   3074.119041  22.099943  1.230866e-02    1.000000e+00\n",
      "country[T.Russia]     0.175185    499.163088   3.379691  3.014598e-01    1.000000e+00\n",
      "country[T.Serbia]     0.019130    670.504733   3.581786  3.406064e-01    1.000000e+00\n",
      "country[T.Thailand]   0.351691  14889.626651  71.822766  6.622778e-02    1.000000e+00\n",
      "country[T.UK]         0.921214   1610.633219  11.592923  4.434469e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.416597     10.960627   1.915738  2.912293e-01    1.000000e+00\n",
      "age_group             0.382764      1.564408   0.784761  3.730022e-01    1.000000e+00\n",
      "Intercept             0.000017      0.037822   0.002739  2.550342e-09    7.115453e-07\n",
      "Running logistic regression with parameter hypert, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 479\n",
      "All counts for signature SBS1536A: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.987840  1.778126  1.325155  6.023258e-02    1.000000e+00\n",
      "sex[T.Male]           1.372609  2.530571  1.860026  5.800564e-05    1.618357e-02\n",
      "country[T.Brazil]     0.174885  0.498178  0.296713  3.788987e-06    1.057127e-03\n",
      "country[T.Canada]     0.154149  0.491794  0.276737  1.148192e-05    3.203456e-03\n",
      "country[T.Japan]      0.026247  0.155185  0.066676  3.664896e-11    1.022506e-08\n",
      "country[T.Lithuania]  0.345625  3.770071  1.054673  8.942310e-01    1.000000e+00\n",
      "country[T.Poland]     0.318315  3.673924  1.044694  9.149717e-01    1.000000e+00\n",
      "country[T.Romania]    0.207800  0.699404  0.382126  1.850190e-03    5.162029e-01\n",
      "country[T.Russia]     0.206948  0.469597  0.312800  1.523681e-08    4.251070e-06\n",
      "country[T.Serbia]     0.263201  0.850225  0.473979  1.232269e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000196  0.252845  0.026303  5.253917e-04    1.465843e-01\n",
      "country[T.UK]         0.280245  0.743457  0.456965  1.616690e-03    4.510564e-01\n",
      "tobacco_ever[T.Yes]   0.790102  1.438083  1.065900  6.754233e-01    1.000000e+00\n",
      "age_group             1.705630  2.262546  1.959562  1.559664e-23    4.351463e-21\n",
      "Intercept             0.214782  0.548157  0.344649  5.856062e-06    1.633841e-03\n",
      "Running logistic regression with parameter hypert, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 479\n",
      "All counts for signature SBS1536B: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1261.6285 \n",
      "Link Function:         Logit             BIC:             -5234.0743\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -615.81   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -663.34   \n",
      "No. Observations:      957               Deviance:        1231.6    \n",
      "Df Model:              14                Pearson chi2:    957.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.1698   0.2276 -5.1409 0.0000 -1.6158 -0.7238\n",
      "hypert[T.Yes]         0.0274   0.1423  0.1928 0.8471 -0.2514  0.3063\n",
      "sex[T.Male]          -0.0817   0.1457 -0.5609 0.5749 -0.3672  0.2038\n",
      "country[T.Brazil]    -0.7349   0.2625 -2.7997 0.0051 -1.2495 -0.2204\n",
      "country[T.Canada]    -0.4923   0.2814 -1.7496 0.0802 -1.0437  0.0592\n",
      "country[T.Japan]     -0.4011   0.3738 -1.0729 0.2833 -1.1338  0.3316\n",
      "country[T.Lithuania] -0.2720   0.5456 -0.4985 0.6181 -1.3414  0.7974\n",
      "country[T.Poland]    -0.3268   0.6133 -0.5328 0.5942 -1.5289  0.8753\n",
      "country[T.Romania]    0.2517   0.2948  0.8537 0.3933 -0.3261  0.8295\n",
      "country[T.Russia]     0.4103   0.1968  2.0848 0.0371  0.0246  0.7960\n",
      "country[T.Serbia]     0.1761   0.2865  0.6148 0.5387 -0.3854  0.7376\n",
      "country[T.Thailand]  -0.7447   0.9376 -0.7943 0.4270 -2.5823  1.0929\n",
      "country[T.UK]         0.3083   0.2381  1.2948 0.1954 -0.1584  0.7750\n",
      "tobacco_ever[T.Yes]   0.3504   0.1460  2.4005 0.0164  0.0643  0.6365\n",
      "age_group             0.5104   0.0664  7.6851 0.0000  0.3802  0.6405\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.198728  0.484894  0.310422  2.734735e-07    7.629910e-05\n",
      "hypert[T.Yes]         0.777712  1.358331  1.027809  8.471105e-01    1.000000e+00\n",
      "sex[T.Male]           0.692664  1.226058  0.921546  5.748803e-01    1.000000e+00\n",
      "country[T.Brazil]     0.286661  0.802175  0.479534  5.115682e-03    1.000000e+00\n",
      "country[T.Canada]     0.352139  1.060964  0.611234  8.018139e-02    1.000000e+00\n",
      "country[T.Japan]      0.321803  1.393235  0.669588  2.833214e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.261471  2.219755  0.761841  6.181046e-01    1.000000e+00\n",
      "country[T.Poland]     0.216784  2.399568  0.721240  5.941611e-01    1.000000e+00\n",
      "country[T.Romania]    0.721701  2.292263  1.286207  3.932580e-01    1.000000e+00\n",
      "country[T.Russia]     1.024873  2.216685  1.507256  3.708727e-02    1.000000e+00\n",
      "country[T.Serbia]     0.680195  2.090989  1.192594  5.386914e-01    1.000000e+00\n",
      "country[T.Thailand]   0.075598  2.983009  0.474879  4.270375e-01    1.000000e+00\n",
      "country[T.UK]         0.853517  2.170516  1.361092  1.954055e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.066416  1.889865  1.419642  1.637340e-02    1.000000e+00\n",
      "age_group             1.462579  1.897468  1.665892  1.528978e-14    4.265850e-12\n",
      "Running logistic regression with parameter hypert, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 841\n",
      "All counts for signature SBS1536F: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.512037   1.159010  0.771765  2.109371e-01    1.000000e+00\n",
      "sex[T.Male]           0.637255   1.459511  0.961247  8.388481e-01    1.000000e+00\n",
      "country[T.Brazil]     0.741930   3.119579  1.549345  2.346661e-01    1.000000e+00\n",
      "country[T.Canada]     1.506006   6.104532  3.057192  2.309313e-03    6.442982e-01\n",
      "country[T.Japan]      0.601561   4.638256  1.802449  2.690516e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.674397   8.991829  2.774716  1.417178e-01    1.000000e+00\n",
      "country[T.Poland]     0.002750   2.861045  0.356401  4.013287e-01    1.000000e+00\n",
      "country[T.Romania]    0.073302   1.180019  0.369528  9.822203e-02    1.000000e+00\n",
      "country[T.Russia]     0.908183   2.852110  1.600458  1.031811e-01    1.000000e+00\n",
      "country[T.Serbia]     0.290138   2.092170  0.846988  7.177839e-01    1.000000e+00\n",
      "country[T.Thailand]   0.281823  17.047938  2.931413  3.100752e-01    1.000000e+00\n",
      "country[T.UK]         0.767418   3.022098  1.542628  2.158750e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.497802   1.150001  0.758188  1.922142e-01    1.000000e+00\n",
      "age_group             0.841666   1.207150  1.007661  9.020333e-01    1.000000e+00\n",
      "Intercept             0.070221   0.257549  0.137120  7.017046e-11    1.957756e-08\n",
      "Running logistic regression with parameter hypert, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 863\n",
      "All counts for signature SBS1536I: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]          0.544292    2.113196    1.067279  5.544334e-01    1.000000e+00\n",
      "sex[T.Male]            0.454247    1.758710    0.893493  5.251103e-01    1.000000e+00\n",
      "country[T.Brazil]      0.448381    7.774603    1.994140  2.606574e-01    1.000000e+00\n",
      "country[T.Canada]      0.002354    2.839652    0.311436  3.543772e-01    1.000000e+00\n",
      "country[T.Japan]       0.149009    8.197382    1.514512  4.590471e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.452563   27.249843    4.743291  1.260239e-01    1.000000e+00\n",
      "country[T.Poland]      0.017892   25.420270    2.440508  4.278454e-01    1.000000e+00\n",
      "country[T.Romania]    69.952700  641.074675  193.552315  3.119394e-38    8.703110e-36\n",
      "country[T.Russia]      0.050057    2.570702    0.496490  3.236778e-01    1.000000e+00\n",
      "country[T.Serbia]     29.211133  251.660667   78.593998  1.656066e-24    4.620424e-22\n",
      "country[T.Thailand]    7.877818  400.997282   52.353432  9.484054e-05    2.646051e-02\n",
      "country[T.UK]          0.001398    1.679560    0.184870  1.191130e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.564460    2.249052    1.120545  5.218629e-01    1.000000e+00\n",
      "age_group              1.542862    2.957997    2.106594  8.113677e-07    2.263716e-04\n",
      "Intercept              0.000742    0.012222    0.003319  3.244675e-28    9.052645e-26\n",
      "Running logistic regression with parameter hypert, signature DBS2\n",
      "Zero counts for signature DBS2: 558\n",
      "All counts for signature DBS2: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1264.9027 \n",
      "Link Function:         Logit             BIC:             -5230.8001\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -617.45   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -650.07   \n",
      "No. Observations:      957               Deviance:        1234.9    \n",
      "Df Model:              14                Pearson chi2:    956.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2572   0.2276 -5.5249 0.0000 -1.7032 -0.8112\n",
      "hypert[T.Yes]         0.0191   0.1418  0.1346 0.8929 -0.2588  0.2970\n",
      "sex[T.Male]           0.3173   0.1460  2.1732 0.0298  0.0311  0.6036\n",
      "country[T.Brazil]    -0.2615   0.2533 -1.0323 0.3019 -0.7580  0.2350\n",
      "country[T.Canada]    -0.4190   0.2789 -1.5025 0.1330 -0.9656  0.1276\n",
      "country[T.Japan]     -0.9739   0.3951 -2.4651 0.0137 -1.7483 -0.1996\n",
      "country[T.Lithuania] -0.2146   0.5532 -0.3880 0.6980 -1.2989  0.8697\n",
      "country[T.Poland]    -0.3311   0.6013 -0.5507 0.5818 -1.5096  0.8474\n",
      "country[T.Romania]    0.3958   0.2908  1.3615 0.1734 -0.1740  0.9657\n",
      "country[T.Russia]    -0.2777   0.1987 -1.3971 0.1624 -0.6672  0.1119\n",
      "country[T.Serbia]     0.3146   0.2816  1.1172 0.2639 -0.2373  0.8666\n",
      "country[T.Thailand]  -0.0507   0.9400 -0.0539 0.9570 -1.8931  1.7917\n",
      "country[T.UK]        -0.2189   0.2338 -0.9364 0.3491 -0.6772  0.2393\n",
      "tobacco_ever[T.Yes]   0.7493   0.1453  5.1583 0.0000  0.4646  1.0340\n",
      "age_group             0.2385   0.0640  3.7283 0.0002  0.1131  0.3638\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.182094  0.444316  0.284442  3.297479e-08        0.000009\n",
      "hypert[T.Yes]         0.771987  1.345763  1.019270  8.929064e-01        1.000000\n",
      "sex[T.Male]           1.031622  1.828619  1.373478  2.976884e-02        1.000000\n",
      "country[T.Brazil]     0.468620  1.264903  0.769908  3.019473e-01        1.000000\n",
      "country[T.Canada]     0.380747  1.136063  0.657687  1.329600e-01        1.000000\n",
      "country[T.Japan]      0.174075  0.819092  0.377602  1.369885e-02        1.000000\n",
      "country[T.Lithuania]  0.272827  2.386091  0.806839  6.980401e-01        1.000000\n",
      "country[T.Poland]     0.220990  2.333480  0.718105  5.818271e-01        1.000000\n",
      "country[T.Romania]    0.840281  2.626652  1.485640  1.733709e-01        1.000000\n",
      "country[T.Russia]     0.513139  1.118357  0.757544  1.623748e-01        1.000000\n",
      "country[T.Serbia]     0.788740  2.378735  1.369745  2.638921e-01        1.000000\n",
      "country[T.Thailand]   0.150598  5.999841  0.950561  9.569854e-01        1.000000\n",
      "country[T.UK]         0.508019  1.270410  0.803363  3.490779e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.591377  2.812348  2.115539  2.492048e-07        0.000070\n",
      "age_group             1.119753  1.438849  1.269313  1.928095e-04        0.053794\n",
      "Running logistic regression with parameter hypert, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 870\n",
      "All counts for signature DBS4: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.451960  1.139025  0.719301  1.572964e-01    1.000000e+00\n",
      "sex[T.Male]           0.628375  1.624891  1.005281  8.770253e-01    1.000000e+00\n",
      "country[T.Brazil]     0.307385  1.548201  0.725137  4.064394e-01    1.000000e+00\n",
      "country[T.Canada]     0.325480  1.816328  0.814607  6.278842e-01    1.000000e+00\n",
      "country[T.Japan]      0.090676  1.498051  0.461454  2.130278e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.797997  8.501167  2.815725  9.881877e-02    1.000000e+00\n",
      "country[T.Poland]     0.099492  4.169668  0.940571  8.419900e-01    1.000000e+00\n",
      "country[T.Romania]    0.151543  1.254361  0.488917  1.419124e-01    1.000000e+00\n",
      "country[T.Russia]     0.422978  1.443665  0.789686  4.342760e-01    1.000000e+00\n",
      "country[T.Serbia]     0.060577  0.950851  0.302965  3.883756e-02    1.000000e+00\n",
      "country[T.Thailand]   0.003961  4.996399  0.528683  6.255631e-01    1.000000e+00\n",
      "country[T.UK]         0.225569  1.129472  0.531166  9.942341e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.623840  1.612255  1.002846  8.823733e-01    1.000000e+00\n",
      "age_group             1.092286  1.666161  1.345511  5.052477e-03    1.000000e+00\n",
      "Intercept             0.041207  0.178094  0.087800  3.933275e-13    1.097384e-10\n",
      "Running logistic regression with parameter hypert, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 922\n",
      "All counts for signature DBS9: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.340144   1.393592  0.694089  2.944926e-01    1.000000e+00\n",
      "sex[T.Male]           0.723886   3.251761  1.493875  2.722027e-01    1.000000e+00\n",
      "country[T.Brazil]     0.334158   3.314028  1.144846  7.224715e-01    1.000000e+00\n",
      "country[T.Canada]     0.417068   4.222073  1.441502  5.309942e-01    1.000000e+00\n",
      "country[T.Japan]      0.002113   2.212258  0.274126  2.628156e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.253294  11.592956  2.452330  3.471279e-01    1.000000e+00\n",
      "country[T.Poland]     0.006279   7.041053  0.823277  7.384251e-01    1.000000e+00\n",
      "country[T.Romania]    0.001237   1.262640  0.159822  8.927190e-02    1.000000e+00\n",
      "country[T.Russia]     0.482188   2.866963  1.180484  6.590230e-01    1.000000e+00\n",
      "country[T.Serbia]     0.321626   4.116182  1.301232  6.143425e-01    1.000000e+00\n",
      "country[T.Thailand]   0.016009  22.034257  2.170218  5.898462e-01    1.000000e+00\n",
      "country[T.UK]         0.089781   1.628692  0.468768  2.305250e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.478771   1.980098  0.973260  7.968024e-01    1.000000e+00\n",
      "age_group             0.841836   1.575748  1.149015  3.682114e-01    1.000000e+00\n",
      "Intercept             0.010069   0.094881  0.032964  5.381959e-13    1.501567e-10\n",
      "Running logistic regression with parameter hypert, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 864\n",
      "All counts for signature DBS78C: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.534818  1.301880  0.835260  4.225362e-01    1.000000e+00\n",
      "sex[T.Male]           0.929679  2.425670  1.485681  9.803954e-02    1.000000e+00\n",
      "country[T.Brazil]     0.474112  2.333711  1.092233  7.985445e-01    1.000000e+00\n",
      "country[T.Canada]     0.947316  4.081598  2.001358  6.819723e-02    1.000000e+00\n",
      "country[T.Japan]      0.111603  1.882759  0.571716  3.823788e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.108728  4.465959  1.022359  8.757573e-01    1.000000e+00\n",
      "country[T.Poland]     0.002758  2.861634  0.357212  4.013193e-01    1.000000e+00\n",
      "country[T.Romania]    0.288540  2.085033  0.843531  7.114220e-01    1.000000e+00\n",
      "country[T.Russia]     0.478910  1.777460  0.931426  8.079650e-01    1.000000e+00\n",
      "country[T.Serbia]     0.405029  2.586349  1.092928  8.117085e-01    1.000000e+00\n",
      "country[T.Thailand]   0.007081  8.897992  0.943459  8.904288e-01    1.000000e+00\n",
      "country[T.UK]         0.678358  2.656124  1.361355  3.675449e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.761062  1.896083  1.197922  4.305316e-01    1.000000e+00\n",
      "age_group             0.989098  1.477073  1.206633  6.377805e-02    1.000000e+00\n",
      "Intercept             0.025934  0.115219  0.056110  4.876080e-18    1.360426e-15\n",
      "Running logistic regression with parameter hypert, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 900\n",
      "All counts for signature DBS78D: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]          0.533378    1.927666   1.010128  7.625365e-01    1.000000e+00\n",
      "sex[T.Male]            0.412545    1.492767   0.784035  4.286907e-01    1.000000e+00\n",
      "country[T.Brazil]      0.080206    4.063302   0.792004  6.537972e-01    1.000000e+00\n",
      "country[T.Canada]      0.308423    7.567246   1.759257  4.785626e-01    1.000000e+00\n",
      "country[T.Japan]       0.186698   10.008509   1.879605  4.649892e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.427724   24.886796   4.427384  1.591695e-01    1.000000e+00\n",
      "country[T.Poland]      0.741903   45.383397   7.789487  7.289799e-02    1.000000e+00\n",
      "country[T.Romania]    16.239403  122.658856  40.974269  6.950365e-19    1.939152e-16\n",
      "country[T.Russia]      0.308584    4.295829   1.180704  6.746203e-01    1.000000e+00\n",
      "country[T.Serbia]      3.876615   34.875865  10.973089  5.944938e-06    1.658638e-03\n",
      "country[T.Thailand]    0.019053   30.572038   2.656666  5.090818e-01    1.000000e+00\n",
      "country[T.UK]          0.184090    4.448511   1.043943  6.863756e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.396618    1.515125   0.778175  4.297465e-01    1.000000e+00\n",
      "age_group              1.155942    2.067266   1.534298  2.691691e-03    7.509817e-01\n",
      "Intercept              0.002712    0.033013   0.010344  2.820227e-21    7.868434e-19\n",
      "Running logistic regression with parameter hypert, signature ID1\n",
      "Zero counts for signature ID1: 479\n",
      "All counts for signature ID1: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1308.5092 \n",
      "Link Function:         Logit             BIC:             -5187.1936\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -639.25   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -663.34   \n",
      "No. Observations:      957               Deviance:        1278.5    \n",
      "Df Model:              14                Pearson chi2:    959.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.4156   0.2155 -1.9283 0.0538 -0.8380  0.0068\n",
      "hypert[T.Yes]        -0.1770   0.1391 -1.2727 0.2031 -0.4497  0.0956\n",
      "sex[T.Male]           0.1570   0.1422  1.1037 0.2697 -0.1218  0.4358\n",
      "country[T.Brazil]     0.1188   0.2432  0.4885 0.6252 -0.3579  0.5956\n",
      "country[T.Canada]     0.4268   0.2768  1.5419 0.1231 -0.1157  0.9694\n",
      "country[T.Japan]      0.0249   0.3640  0.0684 0.9455 -0.6886  0.7384\n",
      "country[T.Lithuania] -0.7435   0.5671 -1.3110 0.1899 -1.8549  0.3680\n",
      "country[T.Poland]    -1.5845   0.7837 -2.0219 0.0432 -3.1204 -0.0485\n",
      "country[T.Romania]   -0.4042   0.2896 -1.3959 0.1627 -0.9718  0.1633\n",
      "country[T.Russia]     0.2420   0.1916  1.2628 0.2066 -0.1336  0.6175\n",
      "country[T.Serbia]    -0.3312   0.2840 -1.1662 0.2435 -0.8878  0.2254\n",
      "country[T.Thailand]   1.1723   1.1345  1.0334 0.3014 -1.0512  3.3959\n",
      "country[T.UK]        -0.2228   0.2297 -0.9702 0.3319 -0.6730  0.2273\n",
      "tobacco_ever[T.Yes]  -0.2975   0.1414 -2.1042 0.0354 -0.5746 -0.0204\n",
      "age_group             0.2813   0.0623  4.5138 0.0000  0.1592  0.4035\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.432584   1.006843  0.659958  0.053816        1.000000\n",
      "hypert[T.Yes]         0.637846   1.100324  0.837757  0.203139        1.000000\n",
      "sex[T.Male]           0.885323   1.546169  1.169982  0.269738        1.000000\n",
      "country[T.Brazil]     0.699127   1.814068  1.126172  0.625196        1.000000\n",
      "country[T.Canada]     0.890722   2.636388  1.532413  0.123091        1.000000\n",
      "country[T.Japan]      0.502265   2.092602  1.025202  0.945491        1.000000\n",
      "country[T.Lithuania]  0.156464   1.444864  0.475467  0.189855        1.000000\n",
      "country[T.Poland]     0.044140   0.952638  0.205059  0.043189        1.000000\n",
      "country[T.Romania]    0.378402   1.177429  0.667489  0.162736        1.000000\n",
      "country[T.Russia]     0.874964   1.854285  1.273747  0.206649        1.000000\n",
      "country[T.Serbia]     0.411568   1.252844  0.718074  0.243533        1.000000\n",
      "country[T.Thailand]   0.349508  29.842276  3.229569  0.301434        1.000000\n",
      "country[T.UK]         0.510175   1.255228  0.800241  0.331927        1.000000\n",
      "tobacco_ever[T.Yes]   0.562948   0.979810  0.742686  0.035358        1.000000\n",
      "age_group             1.172541   1.497038  1.324892  0.000006        0.001776\n",
      "Running logistic regression with parameter hypert, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 941\n",
      "All counts for signature ID2: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.344468   2.663669  0.969396  5.841158e-01        1.000000\n",
      "sex[T.Male]           0.370272   3.006585  1.019842  5.749845e-01        1.000000\n",
      "country[T.Brazil]     0.001743   2.069669  0.229936  1.778102e-01        1.000000\n",
      "country[T.Canada]     0.002600   3.124568  0.343725  3.960682e-01        1.000000\n",
      "country[T.Japan]      0.635880  16.329587  3.692451  1.023682e-01        1.000000\n",
      "country[T.Lithuania]  0.011317  14.462530  1.512936  4.952134e-01        1.000000\n",
      "country[T.Poland]     0.012023  15.937435  1.618205  4.635018e-01        1.000000\n",
      "country[T.Romania]    0.325995   7.907243  1.851275  3.357733e-01        1.000000\n",
      "country[T.Russia]     0.225721   3.174368  0.867738  5.367298e-01        1.000000\n",
      "country[T.Serbia]     0.002411   2.901876  0.318850  2.667240e-01        1.000000\n",
      "country[T.Thailand]   0.029776  48.565810  4.162275  3.106129e-01        1.000000\n",
      "country[T.UK]         0.349897   5.877312  1.534002  3.813103e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.254406   2.077325  0.737402  4.104196e-01        1.000000\n",
      "age_group             0.551967   1.346359  0.863073  3.928236e-01        1.000000\n",
      "Intercept             0.007870   0.135532  0.036794  1.523464e-08        0.000004\n",
      "Running logistic regression with parameter hypert, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 916\n",
      "All counts for signature ID3: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.324860   1.221009  0.636882  1.657930e-01    1.000000e+00\n",
      "sex[T.Male]           0.448552   1.729860  0.870731  6.119404e-01    1.000000e+00\n",
      "country[T.Brazil]     0.249976   3.317935  1.021621  7.345374e-01    1.000000e+00\n",
      "country[T.Canada]     0.163434   3.192859  0.873256  8.345682e-01    1.000000e+00\n",
      "country[T.Japan]      0.307545   6.302977  1.673182  4.604768e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.006562   7.457146  0.862205  7.089583e-01    1.000000e+00\n",
      "country[T.Poland]     0.283121  14.084970  2.813031  2.826634e-01    1.000000e+00\n",
      "country[T.Romania]    2.251110  14.361700  5.620322  2.693091e-04    7.513724e-02\n",
      "country[T.Russia]     0.308629   2.529025  0.909843  7.154771e-01    1.000000e+00\n",
      "country[T.Serbia]     0.069192   2.951970  0.655981  5.624424e-01    1.000000e+00\n",
      "country[T.Thailand]   0.837466  59.217785  9.169128  6.075419e-02    1.000000e+00\n",
      "country[T.UK]         0.375522   3.419001  1.189116  6.297848e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.896849   3.567434  1.765320  9.559232e-02    1.000000e+00\n",
      "age_group             0.878529   1.577331  1.173260  2.640727e-01    1.000000e+00\n",
      "Intercept             0.008949   0.076871  0.027831  1.955355e-15    5.455439e-13\n",
      "Running logistic regression with parameter hypert, signature ID5\n",
      "Zero counts for signature ID5: 479\n",
      "All counts for signature ID5: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1142.1444 \n",
      "Link Function:         Logit             BIC:             -5353.5584\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -556.07   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -663.34   \n",
      "No. Observations:      957               Deviance:        1112.1    \n",
      "Df Model:              14                Pearson chi2:    964.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.5399   0.2460 -6.2600 0.0000 -2.0221 -1.0578\n",
      "hypert[T.Yes]         0.2802   0.1509  1.8572 0.0633 -0.0155  0.5760\n",
      "sex[T.Male]           0.5330   0.1575  3.3844 0.0007  0.2243  0.8417\n",
      "country[T.Brazil]    -0.9962   0.2708 -3.6793 0.0002 -1.5269 -0.4655\n",
      "country[T.Canada]    -0.8118   0.2971 -2.7321 0.0063 -1.3942 -0.2294\n",
      "country[T.Japan]     -2.8022   0.5009 -5.5938 0.0000 -3.7840 -1.8203\n",
      "country[T.Lithuania] -0.1713   0.5743 -0.2982 0.7656 -1.2969  0.9544\n",
      "country[T.Poland]    -0.9702   0.6713 -1.4452 0.1484 -2.2859  0.3456\n",
      "country[T.Romania]    0.3644   0.3294  1.1062 0.2687 -0.2813  1.0100\n",
      "country[T.Russia]    -0.4577   0.2066 -2.2151 0.0268 -0.8627 -0.0527\n",
      "country[T.Serbia]    -0.1130   0.3040 -0.3717 0.7101 -0.7089  0.4828\n",
      "country[T.Thailand]  -2.3632   1.1612 -2.0352 0.0418 -4.6390 -0.0873\n",
      "country[T.UK]        -0.3394   0.2488 -1.3644 0.1725 -0.8270  0.1482\n",
      "tobacco_ever[T.Yes]  -0.1037   0.1544 -0.6717 0.5017 -0.4063  0.1989\n",
      "age_group             0.7803   0.0747 10.4493 0.0000  0.6339  0.9266\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.132378  0.347219  0.214392  3.850241e-10    1.074217e-07\n",
      "hypert[T.Yes]         0.984621  1.778833  1.323433  6.327653e-02    1.000000e+00\n",
      "sex[T.Male]           1.251507  2.320343  1.704091  7.132154e-04    1.989871e-01\n",
      "country[T.Brazil]     0.217219  0.627812  0.369286  2.338833e-04    6.525343e-02\n",
      "country[T.Canada]     0.248025  0.794987  0.444046  6.293691e-03    1.000000e+00\n",
      "country[T.Japan]      0.022731  0.161970  0.060678  2.221986e-08    6.199342e-06\n",
      "country[T.Lithuania]  0.273367  2.597196  0.842607  7.655690e-01    1.000000e+00\n",
      "country[T.Poland]     0.101682  1.412812  0.379022  1.484083e-01    1.000000e+00\n",
      "country[T.Romania]    0.754832  2.745714  1.439637  2.686575e-01    1.000000e+00\n",
      "country[T.Russia]     0.422036  0.948656  0.632745  2.675600e-02    1.000000e+00\n",
      "country[T.Serbia]     0.492210  1.620665  0.893145  7.100983e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009667  0.916382  0.094123  4.183412e-02    1.000000e+00\n",
      "country[T.UK]         0.437352  1.159714  0.712182  1.724552e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.666125  1.220026  0.901493  5.017459e-01    1.000000e+00\n",
      "age_group             1.884987  2.525987  2.182076  1.475884e-25    4.117715e-23\n",
      "Running logistic regression with parameter hypert, signature ID8\n",
      "Zero counts for signature ID8: 260\n",
      "All counts for signature ID8: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1044.7174 \n",
      "Link Function:         Logit             BIC:             -5450.9854\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -507.36   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -559.77   \n",
      "No. Observations:      957               Deviance:        1014.7    \n",
      "Df Model:              14                Pearson chi2:    949.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.0489   0.2439 -0.2005 0.8411 -0.5268  0.4291\n",
      "hypert[T.Yes]         0.5307   0.1628  3.2599 0.0011  0.2116  0.8498\n",
      "sex[T.Male]           0.4553   0.1630  2.7925 0.0052  0.1357  0.7748\n",
      "country[T.Brazil]    -0.8177   0.2763 -2.9591 0.0031 -1.3593 -0.2761\n",
      "country[T.Canada]    -0.3200   0.3337 -0.9590 0.3376 -0.9741  0.3340\n",
      "country[T.Japan]     -1.5317   0.4004 -3.8252 0.0001 -2.3166 -0.7469\n",
      "country[T.Lithuania]  0.3701   0.7927  0.4669 0.6406 -1.1836  1.9238\n",
      "country[T.Poland]    -0.1990   0.6532 -0.3047 0.7606 -1.4792  1.0811\n",
      "country[T.Romania]   -0.6449   0.3286 -1.9625 0.0497 -1.2890 -0.0008\n",
      "country[T.Russia]    -0.1188   0.2293 -0.5181 0.6044 -0.5682  0.3306\n",
      "country[T.Serbia]    -0.7934   0.3112 -2.5490 0.0108 -1.4034 -0.1833\n",
      "country[T.Thailand]  -1.3158   0.9475 -1.3887 0.1649 -3.1730  0.5413\n",
      "country[T.UK]        -0.2545   0.2832 -0.8986 0.3688 -0.8095  0.3006\n",
      "tobacco_ever[T.Yes]  -0.0638   0.1630 -0.3914 0.6955 -0.3832  0.2556\n",
      "age_group             0.4977   0.0731  6.8083 0.0000  0.3544  0.6410\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.590474  1.535802  0.952287  8.411012e-01    1.000000e+00\n",
      "hypert[T.Yes]         1.235678  2.339091  1.700107  1.114584e-03    3.109689e-01\n",
      "sex[T.Male]           1.145368  2.170136  1.576580  5.230397e-03    1.000000e+00\n",
      "country[T.Brazil]     0.256829  0.758740  0.441437  3.085731e-03    8.609191e-01\n",
      "country[T.Canada]     0.377525  1.396604  0.726122  3.375608e-01    1.000000e+00\n",
      "country[T.Japan]      0.098612  0.473836  0.216162  1.306755e-04    3.645848e-02\n",
      "country[T.Lithuania]  0.306172  6.846783  1.447858  6.406014e-01    1.000000e+00\n",
      "country[T.Poland]     0.227821  2.948011  0.819523  7.605760e-01    1.000000e+00\n",
      "country[T.Romania]    0.275558  0.999162  0.524716  4.970264e-02    1.000000e+00\n",
      "country[T.Russia]     0.566528  1.391836  0.887983  6.043871e-01    1.000000e+00\n",
      "country[T.Serbia]     0.245761  0.832494  0.452322  1.080387e-02    1.000000e+00\n",
      "country[T.Thailand]   0.041879  1.718247  0.268250  1.649272e-01    1.000000e+00\n",
      "country[T.UK]         0.445067  1.350614  0.775315  3.688469e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.681669  1.291268  0.938199  6.954750e-01    1.000000e+00\n",
      "age_group             1.425393  1.898420  1.644990  9.876174e-12    2.755453e-09\n",
      "Running logistic regression with parameter hypert, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 953\n",
      "All counts for signature ID9: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.259823   16.348886  1.583421  5.286405e-01        1.000000\n",
      "sex[T.Male]           0.189744   12.433239  1.216556  6.123651e-01        1.000000\n",
      "country[T.Brazil]     0.004139    7.184747  0.573906  5.155700e-01        1.000000\n",
      "country[T.Canada]     0.156510   13.764341  1.760469  5.418835e-01        1.000000\n",
      "country[T.Japan]      0.007376   14.278943  1.053408  5.169535e-01        1.000000\n",
      "country[T.Lithuania]  0.022560   43.040797  3.201336  3.486212e-01        1.000000\n",
      "country[T.Poland]     0.026035   51.493295  3.713381  3.210348e-01        1.000000\n",
      "country[T.Romania]    0.006614   11.645003  0.920099  6.276952e-01        1.000000\n",
      "country[T.Russia]     0.002156    4.157059  0.305928  3.401586e-01        1.000000\n",
      "country[T.Serbia]     0.214395   20.686670  2.502123  3.033787e-01        1.000000\n",
      "country[T.Thailand]   0.063444  159.688967  9.437387  1.918114e-01        1.000000\n",
      "country[T.UK]         0.002883    5.087639  0.401701  3.523186e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.285961   21.222504  1.914687  3.902459e-01        1.000000\n",
      "age_group             0.499965    3.136700  1.188950  6.220001e-01        1.000000\n",
      "Intercept             0.000083    0.051942  0.003748  5.392327e-08        0.000015\n",
      "Running logistic regression with parameter hypert, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 949\n",
      "All counts for signature ID11: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.658155  15.120938  2.683987  1.510457e-01        1.000000\n",
      "sex[T.Male]           0.150155   2.779587  0.684481  5.433864e-01        1.000000\n",
      "country[T.Brazil]     0.002743   4.002907  0.372755  3.665418e-01        1.000000\n",
      "country[T.Canada]     0.198303  14.122782  2.140932  4.163025e-01        1.000000\n",
      "country[T.Japan]      0.009761  15.231072  1.345332  4.986730e-01        1.000000\n",
      "country[T.Lithuania]  0.010671  17.583510  1.488882  4.751632e-01        1.000000\n",
      "country[T.Poland]     0.044450  94.823214  6.452182  2.835198e-01        1.000000\n",
      "country[T.Romania]    0.004281   6.218816  0.580985  5.191071e-01        1.000000\n",
      "country[T.Russia]     0.219118   5.662659  1.107325  6.179152e-01        1.000000\n",
      "country[T.Serbia]     0.003477   5.101389  0.473185  3.940057e-01        1.000000\n",
      "country[T.Thailand]   0.022782  47.415286  3.329163  3.889092e-01        1.000000\n",
      "country[T.UK]         0.128097   8.699207  1.359057  4.893828e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.020651   1.077717  0.204337  5.501278e-02        1.000000\n",
      "age_group             0.596741   2.219053  1.121990  5.790671e-01        1.000000\n",
      "Intercept             0.000973   0.090460  0.012828  9.360229e-08        0.000026\n",
      "Running logistic regression with parameter hypert, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 950\n",
      "All counts for signature ID12: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.040691      1.974169   0.390606  0.222247        1.000000\n",
      "sex[T.Male]           0.130407      2.453421   0.586311  0.320186        1.000000\n",
      "country[T.Brazil]     1.390141   2068.299695  15.170265  0.017932        1.000000\n",
      "country[T.Canada]     0.025142    891.417176   4.732349  0.431278        1.000000\n",
      "country[T.Japan]      0.049125   1753.190017   9.275718  0.213038        1.000000\n",
      "country[T.Lithuania]  0.121148   4436.211782  23.136863  0.131587        1.000000\n",
      "country[T.Poland]     0.055041   2057.154656  10.655354  0.135478        1.000000\n",
      "country[T.Romania]    0.019544    683.757312   3.656185  0.300984        1.000000\n",
      "country[T.Russia]     0.522976    752.834051   5.556306  0.129510        1.000000\n",
      "country[T.Serbia]     0.015471    548.676369   2.914665  0.333829        1.000000\n",
      "country[T.Thailand]   0.311657  15179.918090  68.345235  0.064240        1.000000\n",
      "country[T.UK]         0.425849   1191.296561   8.108075  0.115946        1.000000\n",
      "tobacco_ever[T.Yes]   0.125046      2.973110   0.693373  0.424459        1.000000\n",
      "age_group             0.235379      1.065629   0.535266  0.060027        1.000000\n",
      "Intercept             0.000101      0.142541   0.014143  0.000007        0.001849\n",
      "Running logistic regression with parameter hypert, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 940\n",
      "All counts for signature ID83C: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]          0.531854      4.648215    1.539468  3.773121e-01    1.000000e+00\n",
      "sex[T.Male]            0.418701      4.059238    1.261520  5.887927e-01    1.000000e+00\n",
      "country[T.Brazil]      0.016162    554.366287    2.993128  4.382723e-01    1.000000e+00\n",
      "country[T.Canada]      0.562918   1585.120055   10.771740  1.082844e-01    1.000000e+00\n",
      "country[T.Japan]       0.032232   1134.337488    6.047332  2.821816e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.080148   2850.412982   15.115189  1.627547e-01    1.000000e+00\n",
      "country[T.Poland]      0.142932   5193.487578   27.220995  1.348163e-01    1.000000e+00\n",
      "country[T.Romania]    17.934030  18221.483687  141.054060  1.217554e-10    3.396974e-08\n",
      "country[T.Russia]      0.008170    283.861899    1.522697  6.216255e-01    1.000000e+00\n",
      "country[T.Serbia]      2.939099   4236.080014   31.243298  2.502330e-03    6.981502e-01\n",
      "country[T.Thailand]    0.183101   7188.832095   36.291375  1.072594e-01    1.000000e+00\n",
      "country[T.UK]          0.012699    437.346820    2.356660  4.526192e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.230904      2.298760    0.746246  5.067653e-01    1.000000e+00\n",
      "age_group              0.905623      2.453254    1.454768  1.143466e-01    1.000000e+00\n",
      "Intercept              0.000004      0.008530    0.000672  1.049398e-17    2.927819e-15\n",
      "Running logistic regression with parameter hypert, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 479\n",
      "All counts for signature SBS_burden: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.750822   1.390390  1.022690  8.655163e-01    1.000000e+00\n",
      "sex[T.Male]           1.416144   2.697584  1.949547  3.828437e-05    1.068134e-02\n",
      "country[T.Brazil]     0.321616   0.951385  0.555096  3.203685e-02    1.000000e+00\n",
      "country[T.Canada]     0.180755   0.613696  0.335569  3.669718e-04    1.023851e-01\n",
      "country[T.Japan]      0.188328   0.973035  0.427689  4.274253e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.265156   2.452086  0.799131  6.720526e-01    1.000000e+00\n",
      "country[T.Poland]     0.193908   2.588528  0.733334  6.274920e-01    1.000000e+00\n",
      "country[T.Romania]    3.468447  19.399959  7.782408  9.255034e-08    2.582154e-05\n",
      "country[T.Russia]     0.487810   1.127150  0.741920  1.616069e-01    1.000000e+00\n",
      "country[T.Serbia]     0.814454   2.809603  1.506071  1.920911e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000265   0.365973  0.036098  2.482909e-03    6.927316e-01\n",
      "country[T.UK]         0.467865   1.277250  0.773034  3.079918e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.846196   1.586946  1.158214  3.589685e-01    1.000000e+00\n",
      "age_group             2.368912   3.280186  2.776174  1.401392e-45    3.909884e-43\n",
      "Intercept             0.055925   0.160147  0.095653  3.483769e-21    9.719716e-19\n",
      "Running logistic regression with parameter hypert, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 528\n",
      "All counts for signature DBS_burden: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "hypert[T.Yes]         0.917190  1.682764  1.242594  1.602136e-01    1.000000e+00\n",
      "sex[T.Male]           1.178951  2.221233  1.615212  2.780381e-03    7.757263e-01\n",
      "country[T.Brazil]     0.373963  1.054415  0.629792  7.852980e-02    1.000000e+00\n",
      "country[T.Canada]     0.231084  0.743268  0.417263  2.918612e-03    8.142927e-01\n",
      "country[T.Japan]      0.090326  0.464853  0.210079  9.412687e-05    2.626140e-02\n",
      "country[T.Lithuania]  0.492491  4.746902  1.453156  4.987405e-01    1.000000e+00\n",
      "country[T.Poland]     0.202232  2.348743  0.714310  5.797730e-01    1.000000e+00\n",
      "country[T.Romania]    1.825778  8.807235  3.861010  2.790180e-04    7.784602e-02\n",
      "country[T.Russia]     0.503868  1.162464  0.765742  2.100012e-01    1.000000e+00\n",
      "country[T.Serbia]     0.719660  2.339283  1.292848  3.905418e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000350  0.453800  0.047088  4.841794e-03    1.000000e+00\n",
      "country[T.UK]         0.620789  1.816678  1.058011  8.060522e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.784087  1.462750  1.070580  6.670072e-01    1.000000e+00\n",
      "age_group             1.737340  2.333554  2.007829  1.777678e-23    4.959722e-21\n",
      "Intercept             0.116038  0.310558  0.191352  5.791455e-12    1.615816e-09\n",
      "Running logistic regression with parameter hypert, signature ID_burden\n",
      "Zero counts for signature ID_burden: 480\n",
      "All counts for signature ID_burden: 957\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1108.0680 \n",
      "Link Function:         Logit             BIC:             -5387.6348\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -539.03   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -663.34   \n",
      "No. Observations:      957               Deviance:        1078.1    \n",
      "Df Model:              14                Pearson chi2:    961.      \n",
      "Df Residuals:          942               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.8249   0.2548 -7.1615 0.0000 -2.3243 -1.3254\n",
      "hypert[T.Yes]         0.3007   0.1539  1.9532 0.0508 -0.0010  0.6024\n",
      "sex[T.Male]           0.6384   0.1616  3.9508 0.0001  0.3217  0.9551\n",
      "country[T.Brazil]    -0.8486   0.2736 -3.1009 0.0019 -1.3849 -0.3122\n",
      "country[T.Canada]    -1.0617   0.3065 -3.4641 0.0005 -1.6624 -0.4610\n",
      "country[T.Japan]     -2.6631   0.4850 -5.4909 0.0000 -3.6137 -1.7125\n",
      "country[T.Lithuania] -0.7410   0.5727 -1.2937 0.1958 -1.8635  0.3816\n",
      "country[T.Poland]    -0.8881   0.6835 -1.2994 0.1938 -2.2277  0.4515\n",
      "country[T.Romania]    0.8268   0.3542  2.3344 0.0196  0.1326  1.5210\n",
      "country[T.Russia]    -0.3356   0.2101 -1.5971 0.1102 -0.7474  0.0762\n",
      "country[T.Serbia]    -0.1708   0.3101 -0.5509 0.5817 -0.7786  0.4370\n",
      "country[T.Thailand]  -2.3950   1.1711 -2.0451 0.0408 -4.6902 -0.0997\n",
      "country[T.UK]        -0.2511   0.2534 -0.9911 0.3216 -0.7477  0.2455\n",
      "tobacco_ever[T.Yes]  -0.1902   0.1573 -1.2090 0.2267 -0.4985  0.1181\n",
      "age_group             0.8801   0.0781 11.2623 0.0000  0.7270  1.0333\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.097854  0.265689  0.161241  7.977407e-13    2.225696e-10\n",
      "hypert[T.Yes]         0.998960  1.826429  1.350752  5.079510e-02    1.000000e+00\n",
      "sex[T.Male]           1.379480  2.599027  1.893490  7.788624e-05    2.173026e-02\n",
      "country[T.Brazil]     0.250353  0.731818  0.428033  1.929015e-03    5.381951e-01\n",
      "country[T.Canada]     0.189684  0.630659  0.345870  5.320587e-04    1.484444e-01\n",
      "country[T.Japan]      0.026953  0.180411  0.069732  3.997975e-08    1.115435e-05\n",
      "country[T.Lithuania]  0.155124  1.464634  0.476655  1.957708e-01    1.000000e+00\n",
      "country[T.Poland]     0.107778  1.570612  0.411434  1.938022e-01    1.000000e+00\n",
      "country[T.Romania]    1.141825  4.576677  2.285994  1.957302e-02    1.000000e+00\n",
      "country[T.Russia]     0.473615  1.079220  0.714937  1.102420e-01    1.000000e+00\n",
      "country[T.Serbia]     0.459034  1.548023  0.842968  5.817330e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009185  0.905069  0.091175  4.084124e-02    1.000000e+00\n",
      "country[T.UK]         0.473434  1.278233  0.777920  3.216186e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.607414  1.125407  0.826794  2.266680e-01    1.000000e+00\n",
      "age_group             2.068813  2.810370  2.411251  2.014318e-29    5.619947e-27\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1, its frequency is 0.78\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.85\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.89\n",
      "Using below/above median model for signature ID1, its frequency is 0.87\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter diabetes, signature SBS1\n",
      "Zero counts for signature SBS1: 410\n",
      "All counts for signature SBS1: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1069.0997 \n",
      "Link Function:         Logit             BIC:             -4366.5996\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -520.55   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.38   \n",
      "No. Observations:      820               Deviance:        1041.1    \n",
      "Df Model:              13                Pearson chi2:    818.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2825   0.2645 -4.8482 0.0000 -1.8010 -0.7640\n",
      "diabetes[T.Yes]      -0.1852   0.2098 -0.8828 0.3774 -0.5964  0.2260\n",
      "sex[T.Male]           0.0426   0.1576  0.2704 0.7869 -0.2663  0.3516\n",
      "country[T.Brazil]     0.3267   0.2682  1.2181 0.2232 -0.1990  0.8524\n",
      "country[T.Canada]     0.8583   0.3043  2.8203 0.0048  0.2618  1.4547\n",
      "country[T.Japan]      0.4262   0.3873  1.1005 0.2711 -0.3329  1.1854\n",
      "country[T.Lithuania]  0.7155   0.5548  1.2896 0.1972 -0.3720  1.8030\n",
      "country[T.Romania]   -1.1427   0.3893 -2.9356 0.0033 -1.9057 -0.3798\n",
      "country[T.Russia]     0.7192   0.2279  3.1553 0.0016  0.2724  1.1659\n",
      "country[T.Serbia]    -0.5679   0.3244 -1.7506 0.0800 -1.2036  0.0679\n",
      "country[T.Thailand]   0.4154   0.9420  0.4410 0.6592 -1.4308  2.2617\n",
      "country[T.UK]         0.5704   0.2545  2.2408 0.0250  0.0715  1.0693\n",
      "tobacco_ever[T.Yes]   0.0699   0.1572  0.4448 0.6564 -0.2382  0.3781\n",
      "age_group             0.4765   0.0703  6.7764 0.0000  0.3386  0.6143\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.165135  0.465785  0.277340  1.246122e-06    3.476681e-04\n",
      "diabetes[T.Yes]       0.550815  1.253553  0.830949  3.773701e-01    1.000000e+00\n",
      "sex[T.Male]           0.766184  1.421313  1.043545  7.868542e-01    1.000000e+00\n",
      "country[T.Brazil]     0.819556  2.345260  1.386388  2.232043e-01    1.000000e+00\n",
      "country[T.Canada]     1.299300  4.283392  2.359112  4.797550e-03    1.000000e+00\n",
      "country[T.Japan]      0.716853  3.271925  1.531499  2.711115e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.689377  6.067821  2.045242  1.972000e-01    1.000000e+00\n",
      "country[T.Romania]    0.148722  0.684006  0.318946  3.328824e-03    9.287420e-01\n",
      "country[T.Russia]     1.313173  3.208704  2.052702  1.603129e-03    4.472731e-01\n",
      "country[T.Serbia]     0.300099  1.070267  0.566732  8.001181e-02    1.000000e+00\n",
      "country[T.Thailand]   0.239110  9.599218  1.515015  6.592051e-01    1.000000e+00\n",
      "country[T.UK]         1.074102  2.913347  1.768964  2.503943e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.788049  1.459449  1.072435  6.564421e-01    1.000000e+00\n",
      "age_group             1.403047  1.848281  1.610350  1.231675e-11    3.436374e-09\n",
      "Running logistic regression with parameter diabetes, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter diabetes, sig SBS2, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 815\n",
      "All counts for signature SBS2: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.003162    3.715062   0.397945  3.581866e-01         1.00000\n",
      "sex[T.Male]           0.375949   22.601234   2.169509  3.059516e-01         1.00000\n",
      "country[T.Brazil]     0.004623   13.017559   0.679981  4.617929e-01         1.00000\n",
      "country[T.Canada]     0.004691   13.332284   0.692200  7.900437e-01         1.00000\n",
      "country[T.Japan]      0.007884   22.852267   1.170451  4.353916e-01         1.00000\n",
      "country[T.Lithuania]  0.028940   88.784778   4.344680  2.728963e-01         1.00000\n",
      "country[T.Romania]    0.006464   18.869689   0.962157  4.953579e-01         1.00000\n",
      "country[T.Russia]     0.191999   18.762248   1.580957  4.635728e-01         1.00000\n",
      "country[T.Serbia]     0.006158   17.976471   0.915054  4.511639e-01         1.00000\n",
      "country[T.Thailand]   0.096054  497.735432  16.195920  1.423069e-01         1.00000\n",
      "country[T.UK]         0.283631   24.304622   2.187833  2.606392e-01         1.00000\n",
      "tobacco_ever[T.Yes]   0.252008    8.770878   1.356365  4.655977e-01         1.00000\n",
      "age_group             0.601306    3.114702   1.310883  3.969729e-01         1.00000\n",
      "Intercept             0.000085    0.049706   0.003382  1.431772e-07         0.00004\n",
      "Running logistic regression with parameter diabetes, signature SBS4\n",
      "Zero counts for signature SBS4: 355\n",
      "All counts for signature SBS4: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1072.4604 \n",
      "Link Function:         Logit             BIC:             -4363.2389\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -522.23   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -560.98   \n",
      "No. Observations:      820               Deviance:        1044.5    \n",
      "Df Model:              13                Pearson chi2:    813.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.4208   0.2579 -1.6317 0.1027 -0.9263  0.0846\n",
      "diabetes[T.Yes]       0.0899   0.2139  0.4203 0.6743 -0.3293  0.5090\n",
      "sex[T.Male]          -0.2411   0.1572 -1.5337 0.1251 -0.5492  0.0670\n",
      "country[T.Brazil]    -0.5449   0.2716 -2.0062 0.0448 -1.0772 -0.0126\n",
      "country[T.Canada]    -0.4781   0.2976 -1.6066 0.1081 -1.0614  0.1052\n",
      "country[T.Japan]      0.9286   0.4850  1.9148 0.0555 -0.0219  1.8792\n",
      "country[T.Lithuania] -0.1284   0.5551 -0.2313 0.8171 -1.2163  0.9595\n",
      "country[T.Romania]   -0.8953   0.3432 -2.6087 0.0091 -1.5679 -0.2226\n",
      "country[T.Russia]    -0.1743   0.2272 -0.7670 0.4431 -0.6196  0.2710\n",
      "country[T.Serbia]    -0.4430   0.3018 -1.4681 0.1421 -1.0344  0.1484\n",
      "country[T.Thailand]  -0.2844   0.9398 -0.3026 0.7622 -2.1264  1.5575\n",
      "country[T.UK]         0.0396   0.2634  0.1502 0.8806 -0.4766  0.5558\n",
      "tobacco_ever[T.Yes]   0.6999   0.1571  4.4536 0.0000  0.3919  1.0079\n",
      "age_group             0.3535   0.0683  5.1787 0.0000  0.2197  0.4872\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.396030  1.088336  0.656516  1.027372e-01        1.000000\n",
      "diabetes[T.Yes]       0.719447  1.663680  1.094042  6.742824e-01        1.000000\n",
      "sex[T.Male]           0.577389  1.069318  0.785756  1.251142e-01        1.000000\n",
      "country[T.Brazil]     0.340534  0.987512  0.579898  4.483164e-02        1.000000\n",
      "country[T.Canada]     0.345961  1.110888  0.619938  1.081400e-01        1.000000\n",
      "country[T.Japan]      0.978352  6.547993  2.531055  5.551354e-02        1.000000\n",
      "country[T.Lithuania]  0.296322  2.610386  0.879497  8.170556e-01        1.000000\n",
      "country[T.Romania]    0.208489  0.800406  0.408505  9.088344e-03        1.000000\n",
      "country[T.Russia]     0.538173  1.311322  0.840071  4.430646e-01        1.000000\n",
      "country[T.Serbia]     0.355424  1.160001  0.642100  1.420748e-01        1.000000\n",
      "country[T.Thailand]   0.119269  4.747145  0.752455  7.621676e-01        1.000000\n",
      "country[T.UK]         0.620860  1.743324  1.040365  8.805719e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.479729  2.739725  2.013468  8.445262e-06        0.002356\n",
      "age_group             1.245682  1.627788  1.423976  2.234143e-07        0.000062\n",
      "Running logistic regression with parameter diabetes, signature SBS5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zero counts for signature SBS5: 757\n",
      "All counts for signature SBS5: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             432.7128  \n",
      "Link Function:         Logit             BIC:             -5002.9865\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -202.36   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -222.18   \n",
      "No. Observations:      820               Deviance:        404.71    \n",
      "Df Model:              13                Pearson chi2:    798.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.3725   0.5104 -6.6082 0.0000 -4.3728 -2.3723\n",
      "diabetes[T.Yes]       0.3288   0.3240  1.0149 0.3102 -0.3062  0.9638\n",
      "sex[T.Male]          -0.4511   0.2883 -1.5645 0.1177 -1.0162  0.1140\n",
      "country[T.Brazil]     0.4117   0.4190  0.9825 0.3259 -0.4096  1.2330\n",
      "country[T.Canada]    -0.3211   0.5426 -0.5917 0.5541 -1.3846  0.7425\n",
      "country[T.Japan]      0.5911   0.5373  1.1001 0.2713 -0.4620  1.6443\n",
      "country[T.Lithuania]  0.6108   0.7136  0.8558 0.3921 -0.7880  2.0095\n",
      "country[T.Romania]   -0.8429   0.7762 -1.0858 0.2776 -2.3643  0.6786\n",
      "country[T.Russia]    -0.8878   0.5090 -1.7442 0.0811 -1.8854  0.1098\n",
      "country[T.Serbia]    -0.2859   0.5920 -0.4829 0.6292 -1.4462  0.8745\n",
      "country[T.Thailand]   0.4587   1.1818  0.3881 0.6979 -1.8576  2.7750\n",
      "country[T.UK]        -0.3706   0.4618 -0.8025 0.4223 -1.2756  0.5345\n",
      "tobacco_ever[T.Yes]   0.1878   0.2932  0.6407 0.5217 -0.3868  0.7624\n",
      "age_group             0.4941   0.1341  3.6858 0.0002  0.2314  0.7568\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.012616   0.093270  0.034303  3.889810e-11    1.085257e-08\n",
      "diabetes[T.Yes]       0.736250   2.621681  1.389321  3.101513e-01    1.000000e+00\n",
      "sex[T.Male]           0.361964   1.120781  0.636932  1.177010e-01    1.000000e+00\n",
      "country[T.Brazil]     0.663909   3.431502  1.509373  3.258690e-01    1.000000e+00\n",
      "country[T.Canada]     0.250415   2.101196  0.725376  5.540769e-01    1.000000e+00\n",
      "country[T.Japan]      0.629991   5.177419  1.806026  2.712927e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.454773   7.459435  1.841833  3.920896e-01    1.000000e+00\n",
      "country[T.Romania]    0.094018   1.971028  0.430478  2.775610e-01    1.000000e+00\n",
      "country[T.Russia]     0.151775   1.116050  0.411568  8.111631e-02    1.000000e+00\n",
      "country[T.Serbia]     0.235458   2.397629  0.751359  6.291885e-01    1.000000e+00\n",
      "country[T.Thailand]   0.156049  16.038130  1.582001  6.979200e-01    1.000000e+00\n",
      "country[T.UK]         0.279251   1.706622  0.690345  4.222861e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.679257   2.143419  1.206621  5.217235e-01    1.000000e+00\n",
      "age_group             1.260308   2.131503  1.639009  2.279489e-04    6.359775e-02\n",
      "Running logistic regression with parameter diabetes, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 777\n",
      "All counts for signature SBS12: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]        0.947356    6.433276    2.510815  6.193328e-02    1.000000e+00\n",
      "sex[T.Male]            0.361645    2.074888    0.861375  6.966263e-01    1.000000e+00\n",
      "country[T.Brazil]      0.373479    8.809852    1.813676  4.098485e-01    1.000000e+00\n",
      "country[T.Canada]      1.058283   18.478458    4.136316  4.087012e-02    1.000000e+00\n",
      "country[T.Japan]      40.862906  618.728967  138.338773  1.130939e-21    3.155321e-19\n",
      "country[T.Lithuania]   0.006976   11.193948    0.968931  6.899766e-01    1.000000e+00\n",
      "country[T.Romania]     0.154187   10.393822    1.633477  5.694039e-01    1.000000e+00\n",
      "country[T.Russia]      0.116654    3.950946    0.729779  6.362025e-01    1.000000e+00\n",
      "country[T.Serbia]      0.118780    7.958966    1.255733  6.880463e-01    1.000000e+00\n",
      "country[T.Thailand]    0.017580   34.633871    2.558751  5.449888e-01    1.000000e+00\n",
      "country[T.UK]          0.187992    6.063150    1.151189  6.878882e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.275793    1.593643    0.672024  3.549939e-01    1.000000e+00\n",
      "age_group              0.837301    1.736582    1.198068  3.133024e-01    1.000000e+00\n",
      "Intercept              0.002770    0.061708    0.014909  1.168513e-11    3.260150e-09\n",
      "Running logistic regression with parameter diabetes, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 684\n",
      "All counts for signature SBS13: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.422723  1.280863  0.752494  3.001685e-01    1.000000e+00\n",
      "sex[T.Male]           1.130824  2.591564  1.699601  1.033665e-02    1.000000e+00\n",
      "country[T.Brazil]     0.446627  1.883988  0.934976  8.253466e-01    1.000000e+00\n",
      "country[T.Canada]     0.431767  1.999395  0.953456  8.989795e-01    1.000000e+00\n",
      "country[T.Japan]      0.124677  1.422616  0.486842  1.986224e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.394662  5.299640  1.629200  4.570437e-01    1.000000e+00\n",
      "country[T.Romania]    0.054973  0.903223  0.279112  3.101383e-02    1.000000e+00\n",
      "country[T.Russia]     1.057159  3.183500  1.818210  3.033221e-02    1.000000e+00\n",
      "country[T.Serbia]     0.326394  1.777469  0.795454  5.742219e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004969  6.280174  0.662913  7.407899e-01    1.000000e+00\n",
      "country[T.UK]         0.701343  2.430480  1.308537  3.868802e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.836080  1.835281  1.236734  2.842884e-01    1.000000e+00\n",
      "age_group             0.904788  1.278266  1.074418  4.106433e-01    1.000000e+00\n",
      "Intercept             0.052973  0.207971  0.107004  3.183365e-12    8.881590e-10\n",
      "Running logistic regression with parameter diabetes, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 766\n",
      "All counts for signature SBS18: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.401456   1.960389  0.936092  8.006185e-01    1.000000e+00\n",
      "sex[T.Male]           1.197184   4.384536  2.225062  1.038969e-02    1.000000e+00\n",
      "country[T.Brazil]     0.668411   5.047777  1.836691  2.256067e-01    1.000000e+00\n",
      "country[T.Canada]     0.350038   3.966753  1.256714  7.049992e-01    1.000000e+00\n",
      "country[T.Japan]      0.246466   5.247097  1.355408  6.235455e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.681377  16.640273  3.933399  1.094633e-01    1.000000e+00\n",
      "country[T.Romania]    0.530704   6.186423  1.928407  2.835740e-01    1.000000e+00\n",
      "country[T.Russia]     0.529897   3.315615  1.291465  5.484908e-01    1.000000e+00\n",
      "country[T.Serbia]     0.483317   4.788686  1.581101  4.022797e-01    1.000000e+00\n",
      "country[T.Thailand]   0.018916  27.937462  2.594837  5.325943e-01    1.000000e+00\n",
      "country[T.UK]         0.563361   4.185694  1.535145  3.695109e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.474676   1.482194  0.840144  5.146400e-01    1.000000e+00\n",
      "age_group             0.668538   1.098082  0.857089  2.160286e-01    1.000000e+00\n",
      "Intercept             0.015915   0.124359  0.047092  6.697178e-12    1.868513e-09\n",
      "Running logistic regression with parameter diabetes, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter diabetes, sig SBS21, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 816\n",
      "All counts for signature SBS21: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.006070     7.451353   0.763821  0.586070        1.000000\n",
      "sex[T.Male]           0.112240     4.564401   0.713607  0.454080        1.000000\n",
      "country[T.Brazil]     0.007647   267.882697   1.432073  0.511908        1.000000\n",
      "country[T.Canada]     0.012184   425.103906   2.275469  0.640778        1.000000\n",
      "country[T.Japan]      0.025390   887.186786   4.745545  0.310059        1.000000\n",
      "country[T.Lithuania]  0.058322  2212.280149  11.379070  0.203710        1.000000\n",
      "country[T.Romania]    0.476153  1376.603720   9.289919  0.086699        1.000000\n",
      "country[T.Russia]     0.092065   267.263904   1.797689  0.438932        1.000000\n",
      "country[T.Serbia]     0.009762   341.386731   1.825667  0.408187        1.000000\n",
      "country[T.Thailand]   0.157081  7399.729490  33.738789  0.092386        1.000000\n",
      "country[T.UK]         0.577723  1007.502653   7.258274  0.105752        1.000000\n",
      "tobacco_ever[T.Yes]   0.159815     7.043196   1.054476  0.522314        1.000000\n",
      "age_group             0.276131     1.534702   0.668675  0.271760        1.000000\n",
      "Intercept             0.000064     0.142002   0.010355  0.000030        0.008358\n",
      "Running logistic regression with parameter diabetes, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 759\n",
      "All counts for signature SBS22: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]        0.498289     4.046989    1.464231  4.363974e-01    1.000000e+00\n",
      "sex[T.Male]            0.247315     1.183079    0.546641  1.195727e-01    1.000000e+00\n",
      "country[T.Brazil]      0.792277    52.872298    5.002387  7.803673e-02    1.000000e+00\n",
      "country[T.Canada]      0.619644    54.224942    4.827258  1.261636e-01    1.000000e+00\n",
      "country[T.Japan]       0.010958    31.938848    1.628099  5.665244e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.016598    51.562523    2.516358  4.461456e-01    1.000000e+00\n",
      "country[T.Romania]    89.690269  4096.961807  414.697070  7.928324e-29    2.212003e-26\n",
      "country[T.Russia]      0.002786     7.952538    0.411429  4.960308e-01    1.000000e+00\n",
      "country[T.Serbia]     12.745040   542.642468   56.692499  1.684328e-10    4.699276e-08\n",
      "country[T.Thailand]   11.171735  1586.659333  105.297605  5.700736e-05    1.590505e-02\n",
      "country[T.UK]          0.130416    20.691257    1.642224  5.152917e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.355707     1.724542    0.783541  5.060996e-01    1.000000e+00\n",
      "age_group              1.388188     2.810581    1.940162  5.653279e-05    1.577265e-02\n",
      "Intercept              0.000198     0.013231    0.002259  1.433908e-19    4.000603e-17\n",
      "Running logistic regression with parameter diabetes, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter diabetes, sig SBS44, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 815\n",
      "All counts for signature SBS44: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.004258     4.817847   0.530747  0.486606         1.00000\n",
      "sex[T.Male]           0.194008     5.826400   0.987480  0.518317         1.00000\n",
      "country[T.Brazil]     0.255492   724.281820   4.914723  0.219363         1.00000\n",
      "country[T.Canada]     0.011188   388.361022   2.084560  0.693396         1.00000\n",
      "country[T.Japan]      0.022861   797.305314   4.269376  0.323397         1.00000\n",
      "country[T.Lithuania]  0.063759  2286.274993  12.050451  0.201312         1.00000\n",
      "country[T.Romania]    0.520773  1492.046805  10.085540  0.080530         1.00000\n",
      "country[T.Russia]     0.104390   299.766971   2.023588  0.440959         1.00000\n",
      "country[T.Serbia]     0.010477   366.145495   1.958625  0.416487         1.00000\n",
      "country[T.Thailand]   0.208481  9734.302023  44.427917  0.080827         1.00000\n",
      "country[T.UK]         0.554318   967.992699   6.970706  0.109129         1.00000\n",
      "tobacco_ever[T.Yes]   0.292989     9.337525   1.538476  0.416718         1.00000\n",
      "age_group             0.342047     1.586779   0.747103  0.353370         1.00000\n",
      "Intercept             0.000036     0.080101   0.005722  0.000001         0.00034\n",
      "Running logistic regression with parameter diabetes, signature SBS1536A\n",
      "Zero counts for signature SBS1536A: 410\n",
      "All counts for signature SBS1536A: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             983.2053  \n",
      "Link Function:         Logit             BIC:             -4452.4939\n",
      "Dependent Variable:    SBS1536A_bool     Log-Likelihood:  -477.60   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.38   \n",
      "No. Observations:      820               Deviance:        955.21    \n",
      "Df Model:              13                Pearson chi2:    811.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3837   0.2830 -4.8898 0.0000 -1.9383 -0.8290\n",
      "diabetes[T.Yes]       0.0266   0.2216  0.1201 0.9044 -0.4078  0.4610\n",
      "sex[T.Male]           0.7861   0.1689  4.6546 0.0000  0.4551  1.1171\n",
      "country[T.Brazil]    -1.0095   0.2901 -3.4795 0.0005 -1.5781 -0.4408\n",
      "country[T.Canada]    -1.0355   0.3176 -3.2609 0.0011 -1.6579 -0.4131\n",
      "country[T.Japan]     -2.7140   0.4743 -5.7214 0.0000 -3.6437 -1.7842\n",
      "country[T.Lithuania]  0.3691   0.6298  0.5860 0.5579 -0.8653  1.6035\n",
      "country[T.Romania]   -0.6653   0.3692 -1.8018 0.0716 -1.3889  0.0584\n",
      "country[T.Russia]    -0.8519   0.2422 -3.5172 0.0004 -1.3267 -0.3772\n",
      "country[T.Serbia]    -0.4753   0.3208 -1.4815 0.1385 -1.1041  0.1535\n",
      "country[T.Thailand]  -2.3618   1.1576 -2.0402 0.0413 -4.6308 -0.0929\n",
      "country[T.UK]        -0.4878   0.2746 -1.7762 0.0757 -1.0260  0.0505\n",
      "tobacco_ever[T.Yes]   0.2159   0.1653  1.3060 0.1916 -0.1081  0.5398\n",
      "age_group             0.7421   0.0784  9.4603 0.0000  0.5884  0.8959\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.143953  0.436466  0.250661  1.009628e-06    2.816862e-04\n",
      "diabetes[T.Yes]       0.665145  1.585596  1.026962  9.044427e-01    1.000000e+00\n",
      "sex[T.Male]           1.576306  3.055954  2.194793  3.245490e-06    9.054917e-04\n",
      "country[T.Brazil]     0.206371  0.643498  0.364417  5.024133e-04    1.401733e-01\n",
      "country[T.Canada]     0.190537  0.661582  0.355043  1.110533e-03    3.098388e-01\n",
      "country[T.Japan]      0.026156  0.167923  0.066274  1.056229e-08    2.946878e-06\n",
      "country[T.Lithuania]  0.420908  4.970264  1.446382  5.578797e-01    1.000000e+00\n",
      "country[T.Romania]    0.249338  1.060135  0.514133  7.157616e-02    1.000000e+00\n",
      "country[T.Russia]     0.265357  0.685779  0.426587  4.360701e-04    1.216636e-01\n",
      "country[T.Serbia]     0.331524  1.165906  0.621711  1.384749e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009747  0.911273  0.094247  4.132796e-02    1.000000e+00\n",
      "country[T.UK]         0.358430  1.051770  0.613992  7.570552e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.897539  1.715739  1.240944  1.915572e-01    1.000000e+00\n",
      "age_group             1.801046  2.449464  2.100380  3.071094e-21    8.568353e-19\n",
      "Running logistic regression with parameter diabetes, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 410\n",
      "All counts for signature SBS1536B: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1082.6407 \n",
      "Link Function:         Logit             BIC:             -4353.0586\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -527.32   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.38   \n",
      "No. Observations:      820               Deviance:        1054.6    \n",
      "Df Model:              13                Pearson chi2:    819.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3200   0.2650 -4.9819 0.0000 -1.8392 -0.8007\n",
      "diabetes[T.Yes]      -0.2794   0.2072 -1.3486 0.1775 -0.6856  0.1267\n",
      "sex[T.Male]          -0.0176   0.1564 -0.1127 0.9103 -0.3241  0.2889\n",
      "country[T.Brazil]    -0.4696   0.2770 -1.6953 0.0900 -1.0125  0.0733\n",
      "country[T.Canada]    -0.3276   0.2980 -1.0994 0.2716 -0.9116  0.2564\n",
      "country[T.Japan]     -0.3343   0.3862 -0.8656 0.3867 -1.0913  0.4227\n",
      "country[T.Lithuania] -0.0007   0.5438 -0.0014 0.9989 -1.0665  1.0650\n",
      "country[T.Romania]    0.0432   0.3428  0.1260 0.8998 -0.6286  0.7150\n",
      "country[T.Russia]     0.5222   0.2285  2.2857 0.0223  0.0744  0.9700\n",
      "country[T.Serbia]     0.2685   0.3047  0.8812 0.3782 -0.3287  0.8656\n",
      "country[T.Thailand]  -0.5603   0.9411 -0.5954 0.5516 -2.4047  1.2841\n",
      "country[T.UK]         0.3658   0.2571  1.4227 0.1548 -0.1381  0.8697\n",
      "tobacco_ever[T.Yes]   0.4080   0.1568  2.6010 0.0093  0.1005  0.7154\n",
      "age_group             0.5356   0.0706  7.5808 0.0000  0.3971  0.6740\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.158937  0.449033  0.267148  6.296919e-07    1.756840e-04\n",
      "diabetes[T.Yes]       0.503801  1.135047  0.756200  1.774556e-01    1.000000e+00\n",
      "sex[T.Male]           0.723164  1.334932  0.982535  9.102917e-01    1.000000e+00\n",
      "country[T.Brazil]     0.363295  1.076084  0.625248  9.002736e-02    1.000000e+00\n",
      "country[T.Canada]     0.401872  1.292321  0.720658  2.716069e-01    1.000000e+00\n",
      "country[T.Japan]      0.335767  1.526086  0.715828  3.867323e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.344220  2.900835  0.999262  9.989172e-01    1.000000e+00\n",
      "country[T.Romania]    0.533319  2.044155  1.044120  8.997637e-01    1.000000e+00\n",
      "country[T.Russia]     1.077265  2.637819  1.685714  2.226929e-02    1.000000e+00\n",
      "country[T.Serbia]     0.719882  2.376478  1.307969  3.782094e-01    1.000000e+00\n",
      "country[T.Thailand]   0.090292  3.611584  0.571050  5.515927e-01    1.000000e+00\n",
      "country[T.UK]         0.870978  2.386202  1.441641  1.548277e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.105777  2.044962  1.503752  9.294715e-03    1.000000e+00\n",
      "age_group             1.487496  1.962120  1.708405  3.434798e-14    9.583088e-12\n",
      "Running logistic regression with parameter diabetes, signature SBS1536F\n",
      "Zero counts for signature SBS1536F: 716\n",
      "All counts for signature SBS1536F: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             635.4291  \n",
      "Link Function:         Logit             BIC:             -4800.2702\n",
      "Dependent Variable:    SBS1536F_bool     Log-Likelihood:  -303.71   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -311.86   \n",
      "No. Observations:      820               Deviance:        607.43    \n",
      "Df Model:              13                Pearson chi2:    824.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.2427   0.3943 -5.6872 0.0000 -3.0156 -1.4698\n",
      "diabetes[T.Yes]       0.1648   0.2907  0.5668 0.5709 -0.4050  0.7346\n",
      "sex[T.Male]           0.0745   0.2267  0.3286 0.7425 -0.3698  0.5188\n",
      "country[T.Brazil]     0.4499   0.4059  1.1082 0.2678 -0.3458  1.2455\n",
      "country[T.Canada]     1.0452   0.4004  2.6106 0.0090  0.2605  1.8299\n",
      "country[T.Japan]      0.5405   0.5566  0.9712 0.3314 -0.5503  1.6314\n",
      "country[T.Lithuania]  0.8002   0.7032  1.1380 0.2551 -0.5780  2.1784\n",
      "country[T.Romania]   -0.8565   0.7730 -1.1081 0.2678 -2.3715  0.6585\n",
      "country[T.Russia]     0.5721   0.3457  1.6551 0.0979 -0.1054  1.2497\n",
      "country[T.Serbia]    -0.2147   0.5415 -0.3965 0.6918 -1.2759  0.8466\n",
      "country[T.Thailand]   0.8456   1.1581  0.7302 0.4653 -1.4242  3.1155\n",
      "country[T.UK]         0.4575   0.3891  1.1760 0.2396 -0.3050  1.2201\n",
      "tobacco_ever[T.Yes]  -0.2992   0.2267 -1.3197 0.1869 -0.7436  0.1452\n",
      "age_group            -0.0042   0.0974 -0.0433 0.9655 -0.1952  0.1867\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.049017   0.229970  0.106172  1.291171e-08        0.000004\n",
      "diabetes[T.Yes]       0.666963   2.084590  1.179129  5.708576e-01        1.000000\n",
      "sex[T.Male]           0.690842   1.680047  1.077333  7.424789e-01        1.000000\n",
      "country[T.Brazil]     0.707682   3.474683  1.568110  2.677690e-01        1.000000\n",
      "country[T.Canada]     1.297551   6.233308  2.843947  9.039526e-03        1.000000\n",
      "country[T.Japan]      0.576786   5.110872  1.716939  3.314332e-01        1.000000\n",
      "country[T.Lithuania]  0.561029   8.831996  2.225985  2.551232e-01        1.000000\n",
      "country[T.Romania]    0.093344   1.931800  0.424644  2.678202e-01        1.000000\n",
      "country[T.Russia]     0.899991   3.489124  1.772056  9.789619e-02        1.000000\n",
      "country[T.Serbia]     0.279177   2.331604  0.806802  6.917500e-01        1.000000\n",
      "country[T.Thailand]   0.240701  22.544641  2.329489  4.652684e-01        1.000000\n",
      "country[T.UK]         0.737105   3.387440  1.580158  2.396082e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.475385   1.156224  0.741385  1.869188e-01        1.000000\n",
      "age_group             0.822703   1.205303  0.995794  9.654909e-01        1.000000\n",
      "Running logistic regression with parameter diabetes, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 738\n",
      "All counts for signature SBS1536I: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]         0.378064     2.835100    1.055744  5.675711e-01    1.000000e+00\n",
      "sex[T.Male]             0.416090     1.881774    0.884945  5.299144e-01    1.000000e+00\n",
      "country[T.Brazil]       0.616139    20.486016    3.294724  1.239257e-01    1.000000e+00\n",
      "country[T.Canada]       0.003724     6.582647    0.518608  6.367001e-01    1.000000e+00\n",
      "country[T.Japan]        0.215546    19.768468    2.466758  3.046415e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.523755    53.031349    6.255952  9.738053e-02    1.000000e+00\n",
      "country[T.Romania]    100.271131  2342.885479  395.400692  1.571423e-31    4.384271e-29\n",
      "country[T.Russia]       0.076617     6.762140    0.862416  5.278908e-01    1.000000e+00\n",
      "country[T.Serbia]      38.810745   785.266121  141.327681  9.690832e-23    2.703742e-20\n",
      "country[T.Thailand]     9.999707   834.124553   80.898546  5.948856e-05    1.659731e-02\n",
      "country[T.UK]           0.002170     3.820384    0.301969  2.701763e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.447076     2.007622    0.943761  5.739070e-01    1.000000e+00\n",
      "age_group               1.641493     3.340558    2.297397  2.240637e-07    6.251378e-05\n",
      "Intercept               0.000246     0.009161    0.001805  1.030727e-23    2.875729e-21\n",
      "Running logistic regression with parameter diabetes, signature DBS2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zero counts for signature DBS2: 492\n",
      "All counts for signature DBS2: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1081.7232 \n",
      "Link Function:         Logit             BIC:             -4353.9761\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -526.86   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -551.87   \n",
      "No. Observations:      820               Deviance:        1053.7    \n",
      "Df Model:              13                Pearson chi2:    820.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3922   0.2642 -5.2702 0.0000 -1.9100 -0.8744\n",
      "diabetes[T.Yes]      -0.0422   0.2049 -0.2061 0.8367 -0.4438  0.3594\n",
      "sex[T.Male]           0.2349   0.1574  1.4927 0.1355 -0.0735  0.5434\n",
      "country[T.Brazil]    -0.1161   0.2718 -0.4270 0.6693 -0.6488  0.4166\n",
      "country[T.Canada]    -0.3056   0.2968 -1.0294 0.3033 -0.8873  0.2762\n",
      "country[T.Japan]     -0.8213   0.4061 -2.0224 0.0431 -1.6172 -0.0254\n",
      "country[T.Lithuania]  0.0875   0.5409  0.1618 0.8715 -0.9726  1.1476\n",
      "country[T.Romania]    0.1568   0.3385  0.4632 0.6432 -0.5066  0.8201\n",
      "country[T.Russia]    -0.1404   0.2292 -0.6125 0.5402 -0.5896  0.3088\n",
      "country[T.Serbia]     0.4647   0.2989  1.5548 0.1200 -0.1211  1.0506\n",
      "country[T.Thailand]   0.0521   0.9423  0.0553 0.9559 -1.7948  1.8991\n",
      "country[T.UK]        -0.0938   0.2524 -0.3717 0.7101 -0.5885  0.4009\n",
      "tobacco_ever[T.Yes]   0.7480   0.1566  4.7758 0.0000  0.4410  1.0550\n",
      "age_group             0.2666   0.0682  3.9102 0.0001  0.1330  0.4003\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.148087  0.417095  0.248528  1.363118e-07        0.000038\n",
      "diabetes[T.Yes]       0.641580  1.432416  0.958650  8.367132e-01        1.000000\n",
      "sex[T.Male]           0.929098  1.721813  1.264805  1.355214e-01        1.000000\n",
      "country[T.Brazil]     0.522690  1.516844  0.890415  6.693448e-01        1.000000\n",
      "country[T.Canada]     0.411752  1.318116  0.736707  3.032724e-01        1.000000\n",
      "country[T.Japan]      0.198455  0.974965  0.439872  4.313528e-02        1.000000\n",
      "country[T.Lithuania]  0.378106  3.150490  1.091431  8.714955e-01        1.000000\n",
      "country[T.Romania]    0.602553  2.270775  1.169728  6.432194e-01        1.000000\n",
      "country[T.Russia]     0.554563  1.361807  0.869027  5.401903e-01        1.000000\n",
      "country[T.Serbia]     0.885933  2.859226  1.591566  1.200019e-01        1.000000\n",
      "country[T.Thailand]   0.166164  6.679612  1.053523  9.558750e-01        1.000000\n",
      "country[T.UK]         0.555134  1.493156  0.910441  7.101014e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.554292  2.871876  2.112755  1.789950e-06        0.000499\n",
      "age_group             1.142230  1.492241  1.305558  9.223469e-05        0.025733\n",
      "Running logistic regression with parameter diabetes, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 749\n",
      "All counts for signature DBS4: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.472428   1.784040  0.947489  8.236371e-01    1.000000e+00\n",
      "sex[T.Male]           0.562958   1.585000  0.940029  7.896371e-01    1.000000e+00\n",
      "country[T.Brazil]     0.313591   1.768721  0.773537  5.335877e-01    1.000000e+00\n",
      "country[T.Canada]     0.433024   2.489550  1.076912  8.593240e-01    1.000000e+00\n",
      "country[T.Japan]      0.105965   1.860543  0.549582  3.544047e-01    1.000000e+00\n",
      "country[T.Lithuania]  1.037004  10.509940  3.456935  4.278839e-02    1.000000e+00\n",
      "country[T.Romania]    0.074112   1.266247  0.380896  1.216259e-01    1.000000e+00\n",
      "country[T.Russia]     0.382349   1.597576  0.783345  4.883927e-01    1.000000e+00\n",
      "country[T.Serbia]     0.060105   1.014992  0.307622  5.227147e-02    1.000000e+00\n",
      "country[T.Thailand]   0.004089   5.308667  0.548704  6.436903e-01    1.000000e+00\n",
      "country[T.UK]         0.251449   1.398761  0.616588  2.442558e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.460983   1.316066  0.781487  3.490417e-01    1.000000e+00\n",
      "age_group             0.986477   1.556960  1.235031  6.494161e-02    1.000000e+00\n",
      "Intercept             0.040479   0.221953  0.097865  3.310378e-09    9.235955e-07\n",
      "Running logistic regression with parameter diabetes, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 790\n",
      "All counts for signature DBS9: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.339902   2.496800  0.999393  8.304572e-01        1.000000\n",
      "sex[T.Male]           0.579776   2.755241  1.236840  5.605347e-01        1.000000\n",
      "country[T.Brazil]     0.228127   2.459170  0.805507  6.526698e-01        1.000000\n",
      "country[T.Canada]     0.334212   3.622710  1.181772  7.679994e-01        1.000000\n",
      "country[T.Japan]      0.001845   1.987040  0.240283  2.159832e-01        1.000000\n",
      "country[T.Lithuania]  0.164367   8.073500  1.628985  5.587695e-01        1.000000\n",
      "country[T.Romania]    0.001238   1.313192  0.160833  9.570229e-02        1.000000\n",
      "country[T.Russia]     0.214497   1.685734  0.610630  3.238919e-01        1.000000\n",
      "country[T.Serbia]     0.213047   2.922611  0.882361  7.268295e-01        1.000000\n",
      "country[T.Thailand]   0.010660  15.145569  1.453753  7.076020e-01        1.000000\n",
      "country[T.UK]         0.072439   1.390700  0.384510  1.428950e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.299373   1.394336  0.655205  2.627812e-01        1.000000\n",
      "age_group             0.718164   1.372778  0.990112  8.246358e-01        1.000000\n",
      "Intercept             0.019818   0.208349  0.068885  2.935496e-07        0.000082\n",
      "Running logistic regression with parameter diabetes, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 738\n",
      "All counts for signature DBS78C: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.547062  1.859398  1.035274  8.810283e-01    1.000000e+00\n",
      "sex[T.Male]           0.902304  2.480993  1.478520  1.211194e-01    1.000000e+00\n",
      "country[T.Brazil]     0.450064  2.486294  1.085011  8.181795e-01    1.000000e+00\n",
      "country[T.Canada]     0.909675  4.419927  2.018384  8.320509e-02    1.000000e+00\n",
      "country[T.Japan]      0.111748  2.011908  0.584278  4.143991e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.099558  4.283627  0.948644  8.597784e-01    1.000000e+00\n",
      "country[T.Romania]    0.274639  2.608169  0.930724  8.553771e-01    1.000000e+00\n",
      "country[T.Russia]     0.456452  2.020379  0.959401  8.793967e-01    1.000000e+00\n",
      "country[T.Serbia]     0.379801  2.689496  1.066023  8.430807e-01    1.000000e+00\n",
      "country[T.Thailand]   0.007074  9.270348  0.950032  8.888573e-01    1.000000e+00\n",
      "country[T.UK]         0.653008  2.910517  1.381527  3.845147e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.734270  1.925031  1.185283  4.808554e-01    1.000000e+00\n",
      "age_group             0.927628  1.414196  1.142924  2.097721e-01    1.000000e+00\n",
      "Intercept             0.023989  0.129079  0.057473  4.034370e-14    1.125589e-11\n",
      "Running logistic regression with parameter diabetes, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 774\n",
      "All counts for signature DBS78D: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]        0.725087    4.389286   1.843213  1.796489e-01    1.000000e+00\n",
      "sex[T.Male]            0.354690    1.414114   0.709453  3.095800e-01    1.000000e+00\n",
      "country[T.Brazil]      0.062345    3.535561   0.633365  5.295075e-01    1.000000e+00\n",
      "country[T.Canada]      0.250767    6.967315   1.482227  6.194730e-01    1.000000e+00\n",
      "country[T.Japan]       0.159261    9.550107   1.650046  5.337761e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.238633   16.018934   2.585275  3.270272e-01    1.000000e+00\n",
      "country[T.Romania]    10.308166  104.923211  29.723038  5.896232e-12    1.645049e-09\n",
      "country[T.Russia]      0.271367    4.543475   1.108233  7.032891e-01    1.000000e+00\n",
      "country[T.Serbia]      3.187539   35.165913   9.749738  4.485516e-05    1.251459e-02\n",
      "country[T.Thailand]    0.011899   22.022654   1.718022  6.471514e-01    1.000000e+00\n",
      "country[T.UK]          0.155003    4.242481   0.910438  6.673347e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.310803    1.332582   0.650944  2.286711e-01    1.000000e+00\n",
      "age_group              1.144589    2.099434   1.535527  3.680727e-03    1.000000e+00\n",
      "Intercept              0.002737    0.043765   0.012144  9.657825e-16    2.694533e-13\n",
      "Running logistic regression with parameter diabetes, signature ID1\n",
      "Zero counts for signature ID1: 410\n",
      "All counts for signature ID1: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1129.6245 \n",
      "Link Function:         Logit             BIC:             -4306.0748\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -550.81   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.38   \n",
      "No. Observations:      820               Deviance:        1101.6    \n",
      "Df Model:              13                Pearson chi2:    820.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.5194   0.2486 -2.0899 0.0366 -1.0066 -0.0323\n",
      "diabetes[T.Yes]      -0.1223   0.2027 -0.6037 0.5460 -0.5196  0.2749\n",
      "sex[T.Male]           0.1776   0.1521  1.1675 0.2430 -0.1206  0.4758\n",
      "country[T.Brazil]     0.0249   0.2611  0.0955 0.9239 -0.4868  0.5367\n",
      "country[T.Canada]     0.2809   0.2903  0.9676 0.3333 -0.2881  0.8500\n",
      "country[T.Japan]      0.0437   0.3752  0.1164 0.9073 -0.6918  0.7791\n",
      "country[T.Lithuania] -0.8219   0.5688 -1.4449 0.1485 -1.9367  0.2930\n",
      "country[T.Romania]   -0.2748   0.3333 -0.8244 0.4097 -0.9281  0.3785\n",
      "country[T.Russia]     0.3633   0.2211  1.6430 0.1004 -0.0701  0.7967\n",
      "country[T.Serbia]    -0.4507   0.3021 -1.4917 0.1358 -1.0428  0.1415\n",
      "country[T.Thailand]   1.2530   1.1360  1.1030 0.2700 -0.9735  3.4795\n",
      "country[T.UK]        -0.2044   0.2472 -0.8268 0.4083 -0.6889  0.2801\n",
      "tobacco_ever[T.Yes]  -0.2262   0.1511 -1.4967 0.1345 -0.5224  0.0700\n",
      "age_group             0.2586   0.0654  3.9554 0.0001  0.1304  0.3867\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.365455   0.968229  0.594848  0.036630         1.00000\n",
      "diabetes[T.Yes]       0.594787   1.316343  0.884841  0.546036         1.00000\n",
      "sex[T.Male]           0.886428   1.609230  1.194348  0.243017         1.00000\n",
      "country[T.Brazil]     0.614602   1.710280  1.025252  0.923902         1.00000\n",
      "country[T.Canada]     0.749668   2.339573  1.324350  0.333256         1.00000\n",
      "country[T.Japan]      0.500691   2.179592  1.044654  0.907319         1.00000\n",
      "country[T.Lithuania]  0.144174   1.340446  0.439611  0.148498         1.00000\n",
      "country[T.Romania]    0.395296   1.460096  0.759718  0.409690         1.00000\n",
      "country[T.Russia]     0.932310   2.218111  1.438043  0.100388         1.00000\n",
      "country[T.Serbia]     0.352449   1.151962  0.637188  0.135770         1.00000\n",
      "country[T.Thailand]   0.377751  32.442689  3.500751  0.270034         1.00000\n",
      "country[T.UK]         0.502124   1.323277  0.815138  0.408327         1.00000\n",
      "tobacco_ever[T.Yes]   0.593069   1.072522  0.797546  0.134463         1.00000\n",
      "age_group             1.139326   1.472083  1.295061  0.000076         0.02132\n",
      "Running logistic regression with parameter diabetes, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 806\n",
      "All counts for signature ID2: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.071686   2.948547  0.668095  0.471464        1.000000\n",
      "sex[T.Male]           0.380106   3.549303  1.110289  0.557144        1.000000\n",
      "country[T.Brazil]     0.001309   1.684668  0.174792  0.119600        1.000000\n",
      "country[T.Canada]     0.002013   2.594341  0.268708  0.297231        1.000000\n",
      "country[T.Japan]      0.460558  13.221726  2.758246  0.185998        1.000000\n",
      "country[T.Lithuania]  0.009383  13.451316  1.278278  0.513516        1.000000\n",
      "country[T.Romania]    0.100894   5.758705  1.027225  0.524938        1.000000\n",
      "country[T.Russia]     0.166978   2.756244  0.677304  0.421969        1.000000\n",
      "country[T.Serbia]     0.001766   2.282567  0.235862  0.188145        1.000000\n",
      "country[T.Thailand]   0.024362  45.476996  3.487368  0.331773        1.000000\n",
      "country[T.UK]         0.252258   4.821554  1.149503  0.511051        1.000000\n",
      "tobacco_ever[T.Yes]   0.232173   2.134012  0.717785  0.407604        1.000000\n",
      "age_group             0.544921   1.359847  0.858197  0.392092        1.000000\n",
      "Intercept             0.009202   0.205596  0.049957  0.000004        0.001213\n",
      "Running logistic regression with parameter diabetes, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 784\n",
      "All counts for signature ID3: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.207147   1.744123  0.672534  4.093203e-01    1.000000e+00\n",
      "sex[T.Male]           0.482875   2.029568  0.975889  7.604944e-01    1.000000e+00\n",
      "country[T.Brazil]     0.196168   2.865027  0.826219  6.640838e-01    1.000000e+00\n",
      "country[T.Canada]     0.129143   2.744216  0.707301  6.303623e-01    1.000000e+00\n",
      "country[T.Japan]      0.248786   5.491133  1.382515  5.931428e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.005204   6.183105  0.688255  6.512401e-01    1.000000e+00\n",
      "country[T.Romania]    1.752526  14.029358  4.870778  2.505053e-03    6.989097e-01\n",
      "country[T.Russia]     0.243046   2.282354  0.753330  5.562642e-01    1.000000e+00\n",
      "country[T.Serbia]     0.049623   2.280577  0.479483  3.548764e-01    1.000000e+00\n",
      "country[T.Thailand]   0.718157  54.366355  8.026624  7.527177e-02    1.000000e+00\n",
      "country[T.UK]         0.297649   3.013865  0.976803  7.213417e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.747344   3.171681  1.523528  2.332997e-01    1.000000e+00\n",
      "age_group             0.809678   1.489897  1.093342  5.236299e-01    1.000000e+00\n",
      "Intercept             0.010328   0.106540  0.035553  2.242430e-11    6.256381e-09\n",
      "Running logistic regression with parameter diabetes, signature ID5\n",
      "Zero counts for signature ID5: 410\n",
      "All counts for signature ID5: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             967.1363  \n",
      "Link Function:         Logit             BIC:             -4468.5630\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -469.57   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.38   \n",
      "No. Observations:      820               Deviance:        939.14    \n",
      "Df Model:              13                Pearson chi2:    830.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.5563   0.2885 -5.3948 0.0000 -2.1218 -0.9909\n",
      "diabetes[T.Yes]       0.2827   0.2232  1.2668 0.2052 -0.1547  0.7201\n",
      "sex[T.Male]           0.5568   0.1704  3.2680 0.0011  0.2229  0.8907\n",
      "country[T.Brazil]    -1.0413   0.2943 -3.5377 0.0004 -1.6182 -0.4644\n",
      "country[T.Canada]    -0.8597   0.3193 -2.6926 0.0071 -1.4856 -0.2339\n",
      "country[T.Japan]     -2.9058   0.5183 -5.6060 0.0000 -3.9218 -1.8899\n",
      "country[T.Lithuania] -0.2011   0.5784 -0.3477 0.7281 -1.3346  0.9325\n",
      "country[T.Romania]    0.6374   0.4015  1.5873 0.1124 -0.1496  1.4244\n",
      "country[T.Russia]    -0.4391   0.2420 -1.8144 0.0696 -0.9135  0.0352\n",
      "country[T.Serbia]    -0.0891   0.3243 -0.2747 0.7835 -0.7248  0.5466\n",
      "country[T.Thailand]  -2.4689   1.1765 -2.0985 0.0359 -4.7748 -0.1629\n",
      "country[T.UK]        -0.4206   0.2730 -1.5409 0.1233 -0.9556  0.1144\n",
      "tobacco_ever[T.Yes]  -0.0050   0.1671 -0.0298 0.9762 -0.3325  0.3225\n",
      "age_group             0.8238   0.0812 10.1508 0.0000  0.6647  0.9828\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.119820  0.371239  0.210907  6.860316e-08    1.914028e-05\n",
      "diabetes[T.Yes]       0.856679  2.054668  1.326722  2.052256e-01    1.000000e+00\n",
      "sex[T.Male]           1.249643  2.436884  1.745060  1.083110e-03    3.021876e-01\n",
      "country[T.Brazil]     0.198248  0.628508  0.352988  4.036292e-04    1.126126e-01\n",
      "country[T.Canada]     0.226376  0.791419  0.423271  7.089884e-03    1.000000e+00\n",
      "country[T.Japan]      0.019806  0.151086  0.054703  2.070357e-08    5.776296e-06\n",
      "country[T.Lithuania]  0.263254  2.540849  0.817856  7.281012e-01    1.000000e+00\n",
      "country[T.Romania]    0.861021  4.155196  1.891484  1.124437e-01    1.000000e+00\n",
      "country[T.Russia]     0.401117  1.035861  0.644594  6.961781e-02    1.000000e+00\n",
      "country[T.Serbia]     0.484427  1.727362  0.914757  7.835441e-01    1.000000e+00\n",
      "country[T.Thailand]   0.008439  0.849640  0.084678  3.586467e-02    1.000000e+00\n",
      "country[T.UK]         0.384591  1.121181  0.656655  1.233392e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.717155  1.380581  0.995033  9.762259e-01    1.000000e+00\n",
      "age_group             1.943950  2.672035  2.279101  3.285638e-24    9.166930e-22\n",
      "Running logistic regression with parameter diabetes, signature ID8\n",
      "Zero counts for signature ID8: 225\n",
      "All counts for signature ID8: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             898.8015  \n",
      "Link Function:         Logit             BIC:             -4536.8978\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -435.40   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -481.81   \n",
      "No. Observations:      820               Deviance:        870.80    \n",
      "Df Model:              13                Pearson chi2:    811.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.0118   0.2880 -0.0409 0.9673 -0.5762  0.5526\n",
      "diabetes[T.Yes]       0.2804   0.2592  1.0818 0.2793 -0.2276  0.7885\n",
      "sex[T.Male]           0.4139   0.1746  2.3703 0.0178  0.0717  0.7562\n",
      "country[T.Brazil]    -0.8158   0.3059 -2.6672 0.0076 -1.4153 -0.2163\n",
      "country[T.Canada]    -0.2859   0.3630 -0.7876 0.4309 -0.9975  0.4256\n",
      "country[T.Japan]     -1.5820   0.4203 -3.7642 0.0002 -2.4057 -0.7583\n",
      "country[T.Lithuania]  0.3511   0.7986  0.4396 0.6602 -1.2142  1.9164\n",
      "country[T.Romania]   -0.8475   0.3816 -2.2212 0.0263 -1.5954 -0.0997\n",
      "country[T.Russia]    -0.0919   0.2706 -0.3398 0.7340 -0.6222  0.4383\n",
      "country[T.Serbia]    -0.7234   0.3353 -2.1578 0.0309 -1.3805 -0.0663\n",
      "country[T.Thailand]  -1.3666   0.9644 -1.4170 0.1565 -3.2569  0.5236\n",
      "country[T.UK]        -0.2943   0.3117 -0.9443 0.3450 -0.9052  0.3166\n",
      "tobacco_ever[T.Yes]   0.1081   0.1749  0.6182 0.5365 -0.2347  0.4510\n",
      "age_group             0.5676   0.0780  7.2762 0.0000  0.4147  0.7204\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.562032  1.737798  0.988280  9.673450e-01    1.000000e+00\n",
      "diabetes[T.Yes]       0.796415  2.200078  1.323697  2.793356e-01    1.000000e+00\n",
      "sex[T.Male]           1.074294  2.130231  1.512777  1.777279e-02    1.000000e+00\n",
      "country[T.Brazil]     0.242857  0.805479  0.442285  7.648638e-03    1.000000e+00\n",
      "country[T.Canada]     0.368813  1.530540  0.751321  4.309415e-01    1.000000e+00\n",
      "country[T.Japan]      0.090203  0.468474  0.205567  1.670589e-04    4.660944e-02\n",
      "country[T.Lithuania]  0.296951  6.796439  1.420636  6.602038e-01    1.000000e+00\n",
      "country[T.Romania]    0.202827  0.905112  0.428464  2.633426e-02    1.000000e+00\n",
      "country[T.Russia]     0.536752  1.550132  0.912160  7.339936e-01    1.000000e+00\n",
      "country[T.Serbia]     0.251453  0.935839  0.485098  3.094681e-02    1.000000e+00\n",
      "country[T.Thailand]   0.038508  1.688100  0.254962  1.564688e-01    1.000000e+00\n",
      "country[T.UK]         0.404478  1.372389  0.745051  3.450236e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.790813  1.569806  1.114192  5.364511e-01    1.000000e+00\n",
      "age_group             1.513883  2.055335  1.763955  3.432693e-13    9.577213e-11\n",
      "Running logistic regression with parameter diabetes, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 816\n",
      "All counts for signature ID9: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.503913  22.581744  3.373242  0.174536        1.000000\n",
      "sex[T.Male]           0.173043  10.422961  1.059719  0.632931        1.000000\n",
      "country[T.Brazil]     0.002667   4.668423  0.370513  0.371277        1.000000\n",
      "country[T.Canada]     0.113791   9.786422  1.268966  0.735434        1.000000\n",
      "country[T.Japan]      0.004956  10.173785  0.720492  0.538864        1.000000\n",
      "country[T.Lithuania]  0.011591  24.766279  1.706477  0.461213        1.000000\n",
      "country[T.Romania]    0.006687  12.708657  0.942985  0.530601        1.000000\n",
      "country[T.Russia]     0.001811   4.017085  0.264391  0.299113        1.000000\n",
      "country[T.Serbia]     0.163992  17.047957  1.954808  0.416620        1.000000\n",
      "country[T.Thailand]   0.028583  78.828752  4.357052  0.346895        1.000000\n",
      "country[T.UK]         0.002039   3.590245  0.283857  0.256509        1.000000\n",
      "tobacco_ever[T.Yes]   0.282455  18.927538  1.811625  0.438659        1.000000\n",
      "age_group             0.476623   2.920582  1.110637  0.640395        1.000000\n",
      "Intercept             0.000134   0.095071  0.006449  0.000006        0.001678\n",
      "Running logistic regression with parameter diabetes, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 813\n",
      "All counts for signature ID11: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.480320  13.476830  2.874782  0.194958        1.000000\n",
      "sex[T.Male]           0.139460   2.794634  0.646894  0.485888        1.000000\n",
      "country[T.Brazil]     0.002223   4.075975  0.312331  0.312002        1.000000\n",
      "country[T.Canada]     0.152527  13.562844  1.719999  0.543404        1.000000\n",
      "country[T.Japan]      0.009596  18.124087  1.353961  0.543013        1.000000\n",
      "country[T.Lithuania]  0.006649  14.934485  0.985321  0.501639        1.000000\n",
      "country[T.Romania]    0.004674   8.565045  0.655568  0.538073        1.000000\n",
      "country[T.Russia]     0.187505   7.440373  1.090734  0.594037        1.000000\n",
      "country[T.Serbia]     0.003651   6.716717  0.512622  0.445355        1.000000\n",
      "country[T.Thailand]   0.016328  42.894914  2.529347  0.488490        1.000000\n",
      "country[T.UK]         0.097583   8.496477  1.089609  0.519290        1.000000\n",
      "tobacco_ever[T.Yes]   0.024104   1.304412  0.241850  0.093246        1.000000\n",
      "age_group             0.562215   2.134961  1.063078  0.607629        1.000000\n",
      "Intercept             0.001665   0.185614  0.024116  0.000043        0.012049\n",
      "Running logistic regression with parameter diabetes, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter diabetes, sig ID12, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 813\n",
      "All counts for signature ID12: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.005563     7.147951   0.735331  0.575952        1.000000\n",
      "sex[T.Male]           0.132180     2.493890   0.594495  0.331272        1.000000\n",
      "country[T.Brazil]     0.798927  1188.465250   8.719486  0.063372        1.000000\n",
      "country[T.Canada]     0.014126   498.943290   2.653393  0.589565        1.000000\n",
      "country[T.Japan]      0.029294  1042.002851   5.520877  0.263369        1.000000\n",
      "country[T.Lithuania]  0.071236  2848.210370  14.244761  0.173540        1.000000\n",
      "country[T.Romania]    0.015099   537.683093   2.850994  0.274084        1.000000\n",
      "country[T.Russia]     0.339417   492.046331   3.627467  0.242742        1.000000\n",
      "country[T.Serbia]     0.008684   302.287814   1.620641  0.377125        1.000000\n",
      "country[T.Thailand]   0.194055  9320.478161  42.225321  0.074547        1.000000\n",
      "country[T.UK]         0.256799   725.353279   4.921405  0.221051        1.000000\n",
      "tobacco_ever[T.Yes]   0.122915     2.980717   0.690450  0.422315        1.000000\n",
      "age_group             0.209228     0.954849   0.478547  0.029961        1.000000\n",
      "Intercept             0.000150     0.211651   0.021020  0.000089        0.024743\n",
      "Running logistic regression with parameter diabetes, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category  Czechia. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 805\n",
      "All counts for signature ID83C: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]        0.718945     15.904308    3.431770  1.065373e-01    1.000000e+00\n",
      "sex[T.Male]            0.245286      2.651453    0.802674  5.925770e-01    1.000000e+00\n",
      "country[T.Brazil]      0.010429    360.925660    1.940160  4.333172e-01    1.000000e+00\n",
      "country[T.Canada]      0.394663   1118.486937    7.585232  1.673776e-01    1.000000e+00\n",
      "country[T.Japan]       0.022949    829.906031    4.366617  3.196231e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.033208   1243.002571    6.437173  2.241451e-01    1.000000e+00\n",
      "country[T.Romania]    15.357086  18128.917517  135.493319  6.583715e-09    1.836857e-06\n",
      "country[T.Russia]      0.007438    269.497026    1.414636  5.314963e-01    1.000000e+00\n",
      "country[T.Serbia]      2.459944   3807.483408   27.565685  4.549406e-03    1.000000e+00\n",
      "country[T.Thailand]    0.065674   2960.497535   14.085560  1.912407e-01    1.000000e+00\n",
      "country[T.UK]          0.009106    316.128450    1.696257  4.381825e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.223630      2.487246    0.771407  5.565785e-01    1.000000e+00\n",
      "age_group              0.994819      2.816612    1.620014  4.793407e-02    1.000000e+00\n",
      "Intercept              0.000005      0.011670    0.000815  6.759792e-14    1.885982e-11\n",
      "Running logistic regression with parameter diabetes, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 418\n",
      "All counts for signature SBS_burden: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%         OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.876704   2.149464   1.368719  1.667316e-01    1.000000e+00\n",
      "sex[T.Male]           1.520593   3.073309   2.154248  1.334932e-05    3.724461e-03\n",
      "country[T.Brazil]     0.354416   1.154406   0.641446  1.378933e-01    1.000000e+00\n",
      "country[T.Canada]     0.194880   0.721289   0.377573  3.099544e-03    8.647728e-01\n",
      "country[T.Japan]      0.203433   1.139862   0.481025  9.551016e-02    1.000000e+00\n",
      "country[T.Lithuania]  0.303215   2.868720   0.918303  8.341889e-01    1.000000e+00\n",
      "country[T.Romania]    4.114558  31.270843  10.608653  1.550698e-07    4.326446e-05\n",
      "country[T.Russia]     0.551392   1.473259   0.901034  6.756530e-01    1.000000e+00\n",
      "country[T.Serbia]     0.963052   3.635990   1.861485  6.462455e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000268   0.400363   0.037147  3.734553e-03    1.000000e+00\n",
      "country[T.UK]         0.512262   1.536167   0.887179  6.440835e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.858449   1.699760   1.207011  2.791318e-01    1.000000e+00\n",
      "age_group             2.424435   3.461838   2.882126  2.296415e-42    6.406999e-40\n",
      "Intercept             0.034670   0.122803   0.066221  4.285275e-20    1.195592e-17\n",
      "Running logistic regression with parameter diabetes, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 458\n",
      "All counts for signature DBS_burden: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "diabetes[T.Yes]       0.741247  1.823415  1.159996  5.142989e-01    1.000000e+00\n",
      "sex[T.Male]           1.117403  2.226589  1.573965  9.363192e-03    1.000000e+00\n",
      "country[T.Brazil]     0.312324  0.978643  0.554801  4.175398e-02    1.000000e+00\n",
      "country[T.Canada]     0.195358  0.692197  0.370332  1.786718e-03    4.984943e-01\n",
      "country[T.Japan]      0.076079  0.421453  0.183537  4.927617e-05    1.374805e-02\n",
      "country[T.Lithuania]  0.438037  4.334562  1.307725  6.284171e-01    1.000000e+00\n",
      "country[T.Romania]    1.448489  9.707371  3.564972  4.971457e-03    1.000000e+00\n",
      "country[T.Russia]     0.411473  1.106246  0.675541  1.190060e-01    1.000000e+00\n",
      "country[T.Serbia]     0.627335  2.232244  1.179936  6.078607e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000294  0.392770  0.039760  3.056754e-03    8.528343e-01\n",
      "country[T.UK]         0.513337  1.665111  0.922904  7.632971e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.807994  1.592904  1.133757  4.675783e-01    1.000000e+00\n",
      "age_group             1.752802  2.414691  2.050049  1.334519e-21    3.723308e-19\n",
      "Intercept             0.125136  0.395048  0.224325  1.416281e-07    3.951424e-05\n",
      "Running logistic regression with parameter diabetes, signature ID_burden\n",
      "Zero counts for signature ID_burden: 408\n",
      "All counts for signature ID_burden: 820\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             926.1849  \n",
      "Link Function:         Logit             BIC:             -4509.5144\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -449.09   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -568.37   \n",
      "No. Observations:      820               Deviance:        898.18    \n",
      "Df Model:              13                Pearson chi2:    809.      \n",
      "Df Residuals:          806               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.9427   0.3029 -6.4142 0.0000 -2.5363 -1.3491\n",
      "diabetes[T.Yes]       0.1716   0.2271  0.7559 0.4497 -0.2734  0.6167\n",
      "sex[T.Male]           0.7835   0.1776  4.4109 0.0000  0.4354  1.1317\n",
      "country[T.Brazil]    -0.9020   0.2990 -3.0169 0.0026 -1.4880 -0.3160\n",
      "country[T.Canada]    -1.1534   0.3303 -3.4920 0.0005 -1.8009 -0.5060\n",
      "country[T.Japan]     -2.8386   0.5053 -5.6179 0.0000 -3.8290 -1.8483\n",
      "country[T.Lithuania] -0.7219   0.5733 -1.2593 0.2079 -1.8455  0.4016\n",
      "country[T.Romania]    1.4983   0.4698  3.1893 0.0014  0.5775  2.4190\n",
      "country[T.Russia]    -0.3267   0.2476 -1.3191 0.1871 -0.8120  0.1587\n",
      "country[T.Serbia]    -0.1517   0.3326 -0.4562 0.6482 -0.8035  0.5001\n",
      "country[T.Thailand]  -2.4548   1.1821 -2.0766 0.0378 -4.7717 -0.1378\n",
      "country[T.UK]        -0.3605   0.2798 -1.2886 0.1975 -0.9089  0.1878\n",
      "tobacco_ever[T.Yes]  -0.0662   0.1717 -0.3859 0.6996 -0.4027  0.2702\n",
      "age_group             0.9550   0.0867 11.0105 0.0000  0.7850  1.1249\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.079156   0.259478  0.143315  1.415238e-10    3.948513e-08\n",
      "diabetes[T.Yes]       0.760787   1.852760  1.187247  4.497121e-01    1.000000e+00\n",
      "sex[T.Male]           1.545509   3.100770  2.189125  1.029583e-05    2.872535e-03\n",
      "country[T.Brazil]     0.225822   0.729054  0.405754  2.553755e-03    7.124976e-01\n",
      "country[T.Canada]     0.165158   0.602875  0.315547  4.794638e-04    1.337704e-01\n",
      "country[T.Japan]      0.021732   0.157504  0.058505  1.932364e-08    5.391296e-06\n",
      "country[T.Lithuania]  0.157944   1.494281  0.485811  2.079082e-01    1.000000e+00\n",
      "country[T.Romania]    1.781629  11.234603  4.473913  1.425953e-03    3.978408e-01\n",
      "country[T.Russia]     0.443974   1.171968  0.721335  1.871228e-01    1.000000e+00\n",
      "country[T.Serbia]     0.447743   1.648883  0.859229  6.482400e-01    1.000000e+00\n",
      "country[T.Thailand]   0.008466   0.871237  0.085885  3.784130e-02    1.000000e+00\n",
      "country[T.UK]         0.402958   1.206625  0.697294  1.975212e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.668506   1.310266  0.935907  6.996071e-01    1.000000e+00\n",
      "age_group             2.192324   3.080038  2.598546  3.401357e-28    9.489785e-26\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using below/above median model for signature SBS1, its frequency is 0.77\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.88\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.90\n",
      "Using below/above median model for signature ID1, its frequency is 0.86\n",
      "Using below/above median model for signature ID5, its frequency is 0.94\n",
      "Running logistic regression with parameter PFOA_q, signature SBS1\n",
      "Zero counts for signature SBS1: 453\n",
      "All counts for signature SBS1: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1191.4810 \n",
      "Link Function:          Logit              BIC:              -4887.9653\n",
      "Dependent Variable:     SBS1_bool          Log-Likelihood:   -578.74   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -627.30   \n",
      "No. Observations:       905                Deviance:         1157.5    \n",
      "Df Model:               16                 Pearson chi2:     902.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -0.5939   0.3398 -1.7476 0.0805 -1.2599  0.0722\n",
      "sex[T.Male]             -0.0254   0.1489 -0.1708 0.8644 -0.3173  0.2664\n",
      "country[T.Brazil]       -0.0734   0.4317 -0.1700 0.8650 -0.9194  0.7726\n",
      "country[T.Canada]        0.6470   0.4115  1.5722 0.1159 -0.1596  1.4536\n",
      "country[T.Lithuania]     0.3656   0.6371  0.5738 0.5661 -0.8831  1.6143\n",
      "country[T.Poland]       -0.6685   0.6490 -1.0300 0.3030 -1.9405  0.6036\n",
      "country[T.Romania]      -1.1540   0.3453 -3.3419 0.0008 -1.8308 -0.4772\n",
      "country[T.Russia]        0.3969   0.2325  1.7075 0.0877 -0.0587  0.8526\n",
      "country[T.Serbia]       -0.8611   0.3506 -2.4563 0.0140 -1.5483 -0.1740\n",
      "country[T.Thailand]      0.0677   0.9902  0.0684 0.9455 -1.8731  2.0085\n",
      "country[T.UK]            0.4724   0.2847  1.6594 0.0970 -0.0856  1.0303\n",
      "tobacco_ever[T.Yes]     -0.0118   0.1488 -0.0793 0.9368 -0.3035  0.2799\n",
      "rec_period[T.2005-2010] -0.4034   0.2478 -1.6282 0.1035 -0.8891  0.0822\n",
      "rec_period[T.2010-2015] -0.1416   0.2602 -0.5442 0.5863 -0.6517  0.3684\n",
      "rec_period[T.2015+]     -0.2143   0.3945 -0.5431 0.5870 -0.9876  0.5590\n",
      "PFOA_q                  -0.1013   0.0879 -1.1526 0.2491 -0.2736  0.0710\n",
      "age_group                0.4657   0.0664  7.0162 0.0000  0.3356  0.5958\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.283670  1.074826  0.552174  8.052882e-02    1.000000e+00\n",
      "sex[T.Male]              0.728112  1.305307  0.974890  8.644006e-01    1.000000e+00\n",
      "country[T.Brazil]        0.398759  2.165475  0.929248  8.650121e-01    1.000000e+00\n",
      "country[T.Canada]        0.852500  4.278605  1.909846  1.159050e-01    1.000000e+00\n",
      "country[T.Lithuania]     0.413480  5.024373  1.441347  5.661020e-01    1.000000e+00\n",
      "country[T.Poland]        0.143626  1.828601  0.512479  3.030030e-01    1.000000e+00\n",
      "country[T.Romania]       0.160278  0.620518  0.315365  8.322126e-04    2.321873e-01\n",
      "country[T.Russia]        0.942995  2.345626  1.487250  8.773355e-02    1.000000e+00\n",
      "country[T.Serbia]        0.212611  0.840296  0.422678  1.403872e-02    1.000000e+00\n",
      "country[T.Thailand]      0.153652  7.452124  1.070063  9.454781e-01    1.000000e+00\n",
      "country[T.UK]            0.917985  2.801903  1.603778  9.704497e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.738232  1.322998  0.988271  9.368137e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.411031  1.085666  0.668014  1.034734e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.521176  1.445443  0.867946  5.862809e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.372471  1.748924  0.807108  5.870271e-01    1.000000e+00\n",
      "PFOA_q                   0.760598  1.073566  0.903633  2.490910e-01    1.000000e+00\n",
      "age_group                1.398766  1.814410  1.593090  2.279233e-12    6.359061e-10\n",
      "Running logistic regression with parameter PFOA_q, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Covariate rec_period, sig SBS2, perfect or near-perfect separation for category 2010-2015. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 900\n",
      "All counts for signature SBS2: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.213417     6.699032   1.111157  0.303455             1.0\n",
      "country[T.Brazil]        0.003044    52.081881   0.732724  0.246960             1.0\n",
      "country[T.Canada]        0.007422   101.484538   1.642609  0.465056             1.0\n",
      "country[T.Lithuania]     0.025750   342.379392   5.727517  0.120968             1.0\n",
      "country[T.Poland]        0.023248    82.951013   3.589306  0.202950             1.0\n",
      "country[T.Romania]       0.008183    34.661960   1.340922  0.269621             1.0\n",
      "country[T.Russia]        0.247117    39.749052   2.732932  0.268360             1.0\n",
      "country[T.Serbia]        0.032089  1492.352168   8.166051  0.245123             1.0\n",
      "country[T.Thailand]      0.069879  1096.798035  16.515488  0.137412             1.0\n",
      "country[T.UK]            0.576976   172.135219   8.226190  0.086819             1.0\n",
      "tobacco_ever[T.Yes]      0.107047     3.281546   0.635608  0.403288             1.0\n",
      "rec_period[T.2005-2010]  0.057848     8.099039   0.621513  0.412082             1.0\n",
      "rec_period[T.2010-2015]  0.000111     1.464336   0.048512  0.060222             1.0\n",
      "rec_period[T.2015+]      0.010306     9.288242   0.333130  0.365199             1.0\n",
      "PFOA_q                   0.326257     2.072746   0.770655  0.431534             1.0\n",
      "age_group                0.418402     1.911187   0.903292  0.511873             1.0\n",
      "Intercept                0.000690     0.683795   0.034754  0.020095             1.0\n",
      "Running logistic regression with parameter PFOA_q, signature SBS4\n",
      "Zero counts for signature SBS4: 394\n",
      "All counts for signature SBS4: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1196.5960 \n",
      "Link Function:          Logit              BIC:              -4882.8502\n",
      "Dependent Variable:     SBS4_bool          Log-Likelihood:   -581.30   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -619.71   \n",
      "No. Observations:       905                Deviance:         1162.6    \n",
      "Df Model:               16                 Pearson chi2:     904.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -0.3748   0.3390 -1.1057 0.2689 -1.0391  0.2896\n",
      "sex[T.Male]             -0.2359   0.1490 -1.5829 0.1135 -0.5281  0.0562\n",
      "country[T.Brazil]       -0.5834   0.4345 -1.3426 0.1794 -1.4351  0.2683\n",
      "country[T.Canada]       -0.6151   0.4071 -1.5107 0.1309 -1.4131  0.1829\n",
      "country[T.Lithuania]    -0.2111   0.6372 -0.3313 0.7404 -1.4600  1.0379\n",
      "country[T.Poland]        0.5989   0.7079  0.8460 0.3975 -0.7885  1.9863\n",
      "country[T.Romania]      -0.9558   0.3220 -2.9687 0.0030 -1.5868 -0.3248\n",
      "country[T.Russia]       -0.4052   0.2335 -1.7353 0.0827 -0.8628  0.0525\n",
      "country[T.Serbia]       -0.7215   0.3324 -2.1703 0.0300 -1.3731 -0.0699\n",
      "country[T.Thailand]     -0.3814   0.9900 -0.3852 0.7001 -2.3218  1.5591\n",
      "country[T.UK]           -0.1869   0.2925 -0.6390 0.5228 -0.7603  0.3864\n",
      "tobacco_ever[T.Yes]      0.6972   0.1493  4.6693 0.0000  0.4045  0.9899\n",
      "rec_period[T.2005-2010] -0.1606   0.2512 -0.6394 0.5226 -0.6530  0.3317\n",
      "rec_period[T.2010-2015]  0.2043   0.2630  0.7766 0.4374 -0.3113  0.7198\n",
      "rec_period[T.2015+]      0.0351   0.3980  0.0882 0.9297 -0.7451  0.8153\n",
      "PFOA_q                   0.0777   0.0870  0.8931 0.3718 -0.0929  0.2483\n",
      "age_group                0.3331   0.0647  5.1492 0.0000  0.2063  0.4599\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.353770  1.335836  0.687444  2.688606e-01        1.000000\n",
      "sex[T.Male]              0.589752  1.057817  0.789842  1.134543e-01        1.000000\n",
      "country[T.Brazil]        0.238102  1.307682  0.557998  1.793982e-01        1.000000\n",
      "country[T.Canada]        0.243399  1.200708  0.540602  1.308635e-01        1.000000\n",
      "country[T.Lithuania]     0.232226  2.823154  0.809697  7.404406e-01        1.000000\n",
      "country[T.Poland]        0.454510  7.288210  1.820045  3.975494e-01        1.000000\n",
      "country[T.Romania]       0.204582  0.722708  0.384517  2.991104e-03        0.834518\n",
      "country[T.Russia]        0.421986  1.053868  0.666872  8.269606e-02        1.000000\n",
      "country[T.Serbia]        0.253329  0.932463  0.486024  2.998381e-02        1.000000\n",
      "country[T.Thailand]      0.098094  4.754408  0.682919  7.000791e-01        1.000000\n",
      "country[T.UK]            0.467540  1.471701  0.829505  5.228217e-01        1.000000\n",
      "tobacco_ever[T.Yes]      1.498624  2.690863  2.008132  3.022679e-06        0.000843\n",
      "rec_period[T.2005-2010]  0.520482  1.393397  0.851609  5.225601e-01        1.000000\n",
      "rec_period[T.2010-2015]  0.732524  2.053954  1.226610  4.374138e-01        1.000000\n",
      "rec_period[T.2015+]      0.474707  2.259752  1.035723  9.297343e-01        1.000000\n",
      "PFOA_q                   0.911319  1.281875  1.080832  3.718262e-01        1.000000\n",
      "age_group                1.229134  1.583900  1.395287  2.616418e-07        0.000073\n",
      "Running logistic regression with parameter PFOA_q, signature SBS5\n",
      "Zero counts for signature SBS5: 835\n",
      "All counts for signature SBS5: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              475.9998  \n",
      "Link Function:          Logit              BIC:              -5603.4465\n",
      "Dependent Variable:     SBS5_bool          Log-Likelihood:   -221.00   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -246.38   \n",
      "No. Observations:       905                Deviance:         442.00    \n",
      "Df Model:               16                 Pearson chi2:     940.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -3.9266   0.6802 -5.7726 0.0000 -5.2599 -2.5934\n",
      "sex[T.Male]             -0.0278   0.2772 -0.1004 0.9200 -0.5711  0.5155\n",
      "country[T.Brazil]       -0.0381   0.8277 -0.0460 0.9633 -1.6604  1.5843\n",
      "country[T.Canada]       -1.1195   0.8320 -1.3456 0.1784 -2.7501  0.5111\n",
      "country[T.Lithuania]     0.0839   0.9731  0.0862 0.9313 -1.8233  1.9910\n",
      "country[T.Poland]       -0.6099   1.1112 -0.5489 0.5831 -2.7878  1.5679\n",
      "country[T.Romania]      -1.0690   0.7959 -1.3430 0.1793 -2.6290  0.4910\n",
      "country[T.Russia]       -0.8328   0.5278 -1.5777 0.1146 -1.8673  0.2017\n",
      "country[T.Serbia]       -0.1081   0.6755 -0.1601 0.8728 -1.4320  1.2158\n",
      "country[T.Thailand]     -0.0390   1.3568 -0.0287 0.9771 -2.6983  2.6203\n",
      "country[T.UK]           -0.9853   0.6071 -1.6231 0.1046 -2.1752  0.2045\n",
      "tobacco_ever[T.Yes]      0.4691   0.2807  1.6709 0.0947 -0.0812  1.0193\n",
      "rec_period[T.2005-2010] -0.4102   0.4245 -0.9664 0.3339 -1.2422  0.4218\n",
      "rec_period[T.2010-2015] -0.3449   0.4875 -0.7076 0.4792 -1.3003  0.6105\n",
      "rec_period[T.2015+]      0.4671   0.7758  0.6021 0.5471 -1.0535  1.9876\n",
      "PFOA_q                   0.2102   0.1750  1.2007 0.2299 -0.1329  0.5532\n",
      "age_group                0.5891   0.1308  4.5052 0.0000  0.3328  0.8453\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.005196   0.074763  0.019710  7.807581e-09        0.000002\n",
      "sex[T.Male]              0.564880   1.674467  0.972560  9.200498e-01        1.000000\n",
      "country[T.Brazil]        0.190057   4.875713  0.962633  9.633040e-01        1.000000\n",
      "country[T.Canada]        0.063921   1.667153  0.326445  1.784283e-01        1.000000\n",
      "country[T.Lithuania]     0.161489   7.323089  1.087472  9.313258e-01        1.000000\n",
      "country[T.Poland]        0.061559   4.796515  0.543385  5.830599e-01        1.000000\n",
      "country[T.Romania]       0.072150   1.634022  0.343357  1.792611e-01        1.000000\n",
      "country[T.Russia]        0.154548   1.223533  0.434850  1.146250e-01        1.000000\n",
      "country[T.Serbia]        0.238828   3.372846  0.897513  8.728181e-01        1.000000\n",
      "country[T.Thailand]      0.067323  13.739511  0.961762  9.770752e-01        1.000000\n",
      "country[T.UK]            0.113587   1.226942  0.373315  1.045754e-01        1.000000\n",
      "tobacco_ever[T.Yes]      0.922052   2.771339  1.598537  9.474242e-02        1.000000\n",
      "rec_period[T.2005-2010]  0.288743   1.524674  0.663505  3.338619e-01        1.000000\n",
      "rec_period[T.2010-2015]  0.272441   1.841281  0.708266  4.791788e-01        1.000000\n",
      "rec_period[T.2015+]      0.348722   7.298321  1.595331  5.471373e-01        1.000000\n",
      "PFOA_q                   0.875560   1.738841  1.233880  2.298607e-01        1.000000\n",
      "age_group                1.394861   2.328749  1.802299  6.630651e-06        0.001850\n",
      "Running logistic regression with parameter PFOA_q, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 886\n",
      "All counts for signature SBS12: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.372354     2.449123   0.936356  0.842006        1.000000\n",
      "country[T.Brazil]        2.255979   515.345197  32.119069  0.009990        1.000000\n",
      "country[T.Canada]        4.227616   288.972144  33.624257  0.001080        0.301286\n",
      "country[T.Lithuania]     0.092727   433.647299  16.538437  0.161846        1.000000\n",
      "country[T.Poland]        0.013178    26.204984   1.881581  0.573104        1.000000\n",
      "country[T.Romania]       0.611699    24.964566   4.272462  0.126694        1.000000\n",
      "country[T.Russia]        0.168396     6.178504   1.136312  0.742551        1.000000\n",
      "country[T.Serbia]        0.205994    26.158195   2.735683  0.369991        1.000000\n",
      "country[T.Thailand]      0.197007  1090.156574  36.897856  0.109536        1.000000\n",
      "country[T.UK]            0.294513    10.215320   1.884385  0.434046        1.000000\n",
      "tobacco_ever[T.Yes]      0.254637     1.735496   0.676619  0.389332        1.000000\n",
      "rec_period[T.2005-2010]  0.302578     8.304379   1.402898  0.565262        1.000000\n",
      "rec_period[T.2010-2015]  0.122292     5.221431   0.753114  0.669698        1.000000\n",
      "rec_period[T.2015+]      0.011504     1.989975   0.149319  0.140667        1.000000\n",
      "PFOA_q                   0.833492     2.682276   1.483561  0.174683        1.000000\n",
      "age_group                0.703809     1.575821   1.045339  0.748185        1.000000\n",
      "Intercept                0.000681     0.074893   0.008497  0.000002        0.000568\n",
      "Running logistic regression with parameter PFOA_q, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 756\n",
      "All counts for signature SBS13: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                             2.5%     97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              1.110259  2.450714  1.638977  0.012386        1.000000\n",
      "country[T.Brazil]        0.179866  1.498021  0.521713  0.222141        1.000000\n",
      "country[T.Canada]        0.227846  1.688533  0.632895  0.363632        1.000000\n",
      "country[T.Lithuania]     0.211491  4.051795  1.013782  0.854538        1.000000\n",
      "country[T.Poland]        0.001644  1.775330  0.214402  0.182747        1.000000\n",
      "country[T.Romania]       0.031762  0.518270  0.161111  0.001011        0.281958\n",
      "country[T.Russia]        0.666727  2.051399  1.169195  0.573882        1.000000\n",
      "country[T.Serbia]        0.212339  1.292652  0.544813  0.170504        1.000000\n",
      "country[T.Thailand]      0.002986  4.501433  0.412687  0.515563        1.000000\n",
      "country[T.UK]            0.575486  2.268904  1.154971  0.662451        1.000000\n",
      "tobacco_ever[T.Yes]      0.813831  1.727394  1.184221  0.373993        1.000000\n",
      "rec_period[T.2005-2010]  0.593866  2.180557  1.122364  0.699569        1.000000\n",
      "rec_period[T.2010-2015]  0.583096  2.307775  1.145648  0.671367        1.000000\n",
      "rec_period[T.2015+]      0.452254  3.122719  1.188468  0.714914        1.000000\n",
      "PFOA_q                   0.675718  1.044489  0.840151  0.115996        1.000000\n",
      "age_group                0.906564  1.257201  1.066811  0.431220        1.000000\n",
      "Intercept                0.066938  0.390886  0.165137  0.000029        0.008155\n",
      "Running logistic regression with parameter PFOA_q, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 837\n",
      "All counts for signature SBS18: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              1.111623   3.427765  1.917221  0.017606             1.0\n",
      "country[T.Brazil]        0.192590   4.278175  0.886962  0.736091             1.0\n",
      "country[T.Canada]        0.171101   3.790421  0.848704  0.800021             1.0\n",
      "country[T.Lithuania]     0.319967  15.168413  2.415318  0.330986             1.0\n",
      "country[T.Poland]        0.091231   4.326248  0.894716  0.715961             1.0\n",
      "country[T.Romania]       0.233124   2.053024  0.740906  0.523148             1.0\n",
      "country[T.Russia]        0.251165   1.378265  0.596276  0.218800             1.0\n",
      "country[T.Serbia]        0.253011   2.717091  0.869691  0.734333             1.0\n",
      "country[T.Thailand]      0.010267  21.357883  1.513826  0.693583             1.0\n",
      "country[T.UK]            0.480796   3.713325  1.390721  0.492411             1.0\n",
      "tobacco_ever[T.Yes]      0.394432   1.113509  0.665458  0.117391             1.0\n",
      "rec_period[T.2005-2010]  0.205142   1.029860  0.462666  0.056468             1.0\n",
      "rec_period[T.2010-2015]  0.172067   0.958341  0.408232  0.037651             1.0\n",
      "rec_period[T.2015+]      0.083021   1.382957  0.353643  0.133054             1.0\n",
      "PFOA_q                   0.539645   0.997538  0.733204  0.046974             1.0\n",
      "age_group                0.725226   1.133728  0.906795  0.375576             1.0\n",
      "Intercept                0.102490   0.902208  0.312678  0.030407             1.0\n",
      "Running logistic regression with parameter PFOA_q, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Covariate rec_period, sig SBS21, perfect or near-perfect separation for category 2015+. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig SBS21, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 900\n",
      "All counts for signature SBS21: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%          97.5%          OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.187111       5.357276    0.932718  0.619840        1.000000\n",
      "country[T.Brazil]        0.066115   31746.679770   33.591476  0.137632        1.000000\n",
      "country[T.Canada]        0.065454    6663.809950   22.365813  0.202732        1.000000\n",
      "country[T.Lithuania]     0.305561  102477.712183  126.301444  0.046442        1.000000\n",
      "country[T.Poland]        0.036541    1417.743606    7.163867  0.196825        1.000000\n",
      "country[T.Romania]       2.628318    7651.088766   48.625913  0.006496        1.000000\n",
      "country[T.Russia]        0.295953    1146.392248    7.239045  0.146562        1.000000\n",
      "country[T.Serbia]        0.034680    1926.289524    8.095103  0.256172        1.000000\n",
      "country[T.Thailand]      0.949728  340175.683317  411.957462  0.032848        1.000000\n",
      "country[T.UK]            1.168860    2907.197227   18.512353  0.029160        1.000000\n",
      "tobacco_ever[T.Yes]      0.259241       7.679972    1.315090  0.448103        1.000000\n",
      "rec_period[T.2005-2010]  0.060694       9.106882    0.675337  0.445087        1.000000\n",
      "rec_period[T.2010-2015]  0.047755       6.552936    0.494442  0.369942        1.000000\n",
      "rec_period[T.2015+]      0.000502       7.228372    0.151660  0.270256        1.000000\n",
      "PFOA_q                   0.609441       5.495509    1.698706  0.251506        1.000000\n",
      "age_group                0.330386       1.550582    0.729692  0.307851        1.000000\n",
      "Intercept                0.000005       0.082932    0.001667  0.000280        0.078188\n",
      "Running logistic regression with parameter PFOA_q, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 836\n",
      "All counts for signature SBS22: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                               2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]                0.302684      1.323701    0.637664  1.797254e-01    1.000000e+00\n",
      "country[T.Brazil]          0.401277   1021.089071   18.154947  1.137986e-01    1.000000e+00\n",
      "country[T.Canada]          0.395931    607.964404   17.196559  1.205424e-01    1.000000e+00\n",
      "country[T.Lithuania]       0.032859    912.511848   10.108200  1.508288e-01    1.000000e+00\n",
      "country[T.Poland]          0.110080    536.274970   18.698738  1.375146e-01    1.000000e+00\n",
      "country[T.Romania]       146.978006  10308.552961  861.212159  1.315025e-28    3.668919e-26\n",
      "country[T.Russia]          0.004452     13.686703    0.672185  6.693718e-01    1.000000e+00\n",
      "country[T.Serbia]         25.559197   2335.818375  178.629798  5.966709e-10    1.664712e-07\n",
      "country[T.Thailand]        7.673030  27502.417177  401.605651  3.562321e-03    9.938876e-01\n",
      "country[T.UK]              0.242523     43.801375    3.413997  2.811374e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]        0.424516      1.963066    0.909472  7.242986e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]    0.792481     21.175458    3.788560  8.741254e-02    1.000000e+00\n",
      "rec_period[T.2010-2015]    0.223935      3.247676    0.874110  7.145966e-01    1.000000e+00\n",
      "rec_period[T.2015+]        0.031509     29.527161    0.855543  7.721347e-01    1.000000e+00\n",
      "PFOA_q                     0.643742      1.657385    1.030315  7.695290e-01    1.000000e+00\n",
      "age_group                  1.387346      2.764598    1.925025  4.846809e-05    1.352260e-02\n",
      "Intercept                  0.000036      0.008082    0.000724  4.222237e-13    1.178004e-10\n",
      "Running logistic regression with parameter PFOA_q, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig SBS44, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 899\n",
      "All counts for signature SBS44: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%          97.5%          OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.271591       6.584327    1.197309  0.605699        1.000000\n",
      "country[T.Brazil]        1.415737  111040.091683  158.539491  0.020137        1.000000\n",
      "country[T.Canada]        0.070847    6453.188406   22.386342  0.196255        1.000000\n",
      "country[T.Lithuania]     0.376420  116100.462325  150.343362  0.039204        1.000000\n",
      "country[T.Poland]        0.036596    1456.146862    7.241614  0.179575        1.000000\n",
      "country[T.Romania]       3.593825    9834.788396   63.698255  0.003427        0.956016\n",
      "country[T.Russia]        0.395982    1385.163491    8.954684  0.109216        1.000000\n",
      "country[T.Serbia]        0.043388    2332.080781    9.967098  0.229289        1.000000\n",
      "country[T.Thailand]      1.280013  419658.427850  536.728607  0.026178        1.000000\n",
      "country[T.UK]            1.087057    2555.324245   16.616115  0.033378        1.000000\n",
      "tobacco_ever[T.Yes]      0.402322      10.074510    1.810146  0.309469        1.000000\n",
      "rec_period[T.2005-2010]  0.071275      10.730935    0.790685  0.458371        1.000000\n",
      "rec_period[T.2010-2015]  0.055332       7.718127    0.577988  0.420106        1.000000\n",
      "rec_period[T.2015+]      0.000630       8.265592    0.182566  0.302778        1.000000\n",
      "PFOA_q                   0.759475       5.987624    2.093314  0.126405        1.000000\n",
      "age_group                0.370452       1.540770    0.765358  0.333665        1.000000\n",
      "Intercept                0.000002       0.031047    0.000559  0.000020        0.005516\n",
      "Running logistic regression with parameter PFOA_q, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 453\n",
      "All counts for signature SBS1536A: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]              1.417288  2.641795  1.930850  2.763735e-05    7.710820e-03\n",
      "country[T.Brazil]        0.178859  1.070351  0.437453  6.997443e-02    1.000000e+00\n",
      "country[T.Canada]        0.177877  0.940788  0.409603  3.543080e-02    1.000000e+00\n",
      "country[T.Lithuania]     0.494565  7.536091  1.811993  3.768805e-01    1.000000e+00\n",
      "country[T.Poland]        0.231809  2.877413  0.790885  7.113729e-01    1.000000e+00\n",
      "country[T.Romania]       0.160560  0.612108  0.314732  6.613079e-04    1.845049e-01\n",
      "country[T.Russia]        0.167756  0.447313  0.275418  1.370195e-07    3.822845e-05\n",
      "country[T.Serbia]        0.217022  0.857585  0.432915  1.634212e-02    1.000000e+00\n",
      "country[T.Thailand]      0.000327  0.471961  0.044816  6.491222e-03    1.000000e+00\n",
      "country[T.UK]            0.288086  0.939621  0.520951  3.025014e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.783127  1.447566  1.064701  6.842914e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.332856  0.962440  0.567914  3.514586e-02    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.439500  1.325843  0.764517  3.390282e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.183011  0.950960  0.419748  3.739917e-02    1.000000e+00\n",
      "PFOA_q                   0.756054  1.084281  0.906418  2.831919e-01    1.000000e+00\n",
      "age_group                1.788748  2.385526  2.059964  1.526727e-26    4.259568e-24\n",
      "Intercept                0.288422  1.199379  0.587553  1.436107e-01    1.000000e+00\n",
      "Running logistic regression with parameter PFOA_q, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 453\n",
      "All counts for signature SBS1536B: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                  Results: Generalized linear model\n",
      "======================================================================\n",
      "Model:                 GLM                AIC:              1185.4109 \n",
      "Link Function:         Logit              BIC:              -4894.0353\n",
      "Dependent Variable:    SBS1536B_bool      Log-Likelihood:   -575.71   \n",
      "Date:                  2024-02-08 17:23   LL-Null:          -627.30   \n",
      "No. Observations:      905                Deviance:         1151.4    \n",
      "Df Model:              16                 Pearson chi2:     904.      \n",
      "Df Residuals:          888                Scale:            1.0000    \n",
      "Method:                IRLS                                           \n",
      "----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025 0.975]\n",
      "----------------------------------------------------------------------\n",
      "Intercept               -0.6663   0.3425 -1.9454 0.0517 -1.3376 0.0050\n",
      "sex[T.Male]             -0.1108   0.1494 -0.7416 0.4583 -0.4035 0.1820\n",
      "country[T.Brazil]       -0.7863   0.4434 -1.7731 0.0762 -1.6554 0.0829\n",
      "country[T.Canada]       -0.4429   0.4103 -1.0794 0.2804 -1.2471 0.3613\n",
      "country[T.Lithuania]    -0.1181   0.6297 -0.1875 0.8513 -1.3522 1.1160\n",
      "country[T.Poland]       -0.4569   0.6335 -0.7212 0.4708 -1.6985 0.7848\n",
      "country[T.Romania]       0.1166   0.3255  0.3583 0.7201 -0.5213 0.7545\n",
      "country[T.Russia]        0.2962   0.2346  1.2627 0.2067 -0.1636 0.7559\n",
      "country[T.Serbia]       -0.0301   0.3360 -0.0896 0.9286 -0.6887 0.6285\n",
      "country[T.Thailand]     -0.6903   0.9920 -0.6959 0.4865 -2.6347 1.2540\n",
      "country[T.UK]            0.4251   0.2893  1.4692 0.1418 -0.1420 0.9922\n",
      "tobacco_ever[T.Yes]      0.2874   0.1501  1.9145 0.0556 -0.0068 0.5816\n",
      "rec_period[T.2005-2010] -0.4816   0.2491 -1.9337 0.0532 -0.9697 0.0065\n",
      "rec_period[T.2010-2015] -0.1812   0.2616 -0.6926 0.4885 -0.6939 0.3315\n",
      "rec_period[T.2015+]     -0.4796   0.3978 -1.2057 0.2279 -1.2592 0.3000\n",
      "PFOA_q                  -0.1374   0.0882 -1.5590 0.1190 -0.3102 0.0354\n",
      "age_group                0.5637   0.0682  8.2633 0.0000  0.4300 0.6974\n",
      "======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.262467  1.004992  0.513593  5.172380e-02    1.000000e+00\n",
      "sex[T.Male]              0.667968  1.199585  0.895145  4.583191e-01    1.000000e+00\n",
      "country[T.Brazil]        0.191015  1.086394  0.455541  7.621217e-02    1.000000e+00\n",
      "country[T.Canada]        0.287347  1.435218  0.642188  2.804238e-01    1.000000e+00\n",
      "country[T.Lithuania]     0.258675  3.052725  0.888631  8.512528e-01    1.000000e+00\n",
      "country[T.Poland]        0.182956  2.191869  0.633259  4.707879e-01    1.000000e+00\n",
      "country[T.Romania]       0.593763  2.126597  1.123697  7.200960e-01    1.000000e+00\n",
      "country[T.Russia]        0.849112  2.129561  1.344707  2.067123e-01    1.000000e+00\n",
      "country[T.Serbia]        0.502230  1.874785  0.970347  9.286209e-01    1.000000e+00\n",
      "country[T.Thailand]      0.071741  3.504462  0.501412  4.865130e-01    1.000000e+00\n",
      "country[T.UK]            0.867630  2.697159  1.529751  1.417712e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.993196  1.788964  1.332964  5.555802e-02    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.379185  1.006564  0.617798  5.315073e-02    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.499610  1.393116  0.834275  4.885481e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.283872  1.349881  0.619026  2.279224e-01    1.000000e+00\n",
      "PFOA_q                   0.733291  1.035982  0.871594  1.190045e-01    1.000000e+00\n",
      "age_group                1.537258  2.008542  1.757171  1.416785e-16    3.952829e-14\n",
      "Running logistic regression with parameter PFOA_q, signature SBS1536F\n",
      "** Warning: Covariate country, sig SBS1536F, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536F: 795\n",
      "All counts for signature SBS1536F: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.656891   1.536785  1.001183  0.918431        1.000000\n",
      "country[T.Brazil]        0.612399   6.756923  2.010957  0.248703        1.000000\n",
      "country[T.Canada]        1.372797  11.264298  3.908289  0.010713        1.000000\n",
      "country[T.Lithuania]     0.676352  15.662668  3.497797  0.125312        1.000000\n",
      "country[T.Poland]        0.002875   3.244198  0.377453  0.440073        1.000000\n",
      "country[T.Romania]       0.058172   1.014761  0.301590  0.052733        1.000000\n",
      "country[T.Russia]        0.653165   2.516550  1.277540  0.469641        1.000000\n",
      "country[T.Serbia]        0.181600   1.611474  0.577489  0.300036        1.000000\n",
      "country[T.Thailand]      0.359459  30.882543  4.248168  0.211198        1.000000\n",
      "country[T.UK]            0.778013   3.795751  1.733533  0.173915        1.000000\n",
      "tobacco_ever[T.Yes]      0.469234   1.111214  0.724105  0.139034        1.000000\n",
      "rec_period[T.2005-2010]  0.411952   2.030906  0.897029  0.763628        1.000000\n",
      "rec_period[T.2010-2015]  0.527070   2.671366  1.159969  0.697656        1.000000\n",
      "rec_period[T.2015+]      0.175329   1.776434  0.562164  0.323884        1.000000\n",
      "PFOA_q                   0.699006   1.146780  0.895083  0.377345        1.000000\n",
      "age_group                0.854202   1.230677  1.024500  0.783339        1.000000\n",
      "Intercept                0.057932   0.426903  0.162238  0.000164        0.045877\n",
      "Running logistic regression with parameter PFOA_q, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 813\n",
      "All counts for signature SBS1536I: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                              2.5%          97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]               0.457748       1.771514    0.900944  4.496470e-01    1.000000e+00\n",
      "country[T.Brazil]         0.016320    4053.657160    1.236228  4.636160e-01    1.000000e+00\n",
      "country[T.Canada]         0.000422      50.658201    0.220644  6.018319e-01    1.000000e+00\n",
      "country[T.Lithuania]      0.025153    8927.258856    2.520341  3.271814e-01    1.000000e+00\n",
      "country[T.Poland]         0.017360      31.577231    2.475080  3.691683e-01    1.000000e+00\n",
      "country[T.Romania]       59.053695     817.051861  198.231126  5.091451e-27    1.420515e-24\n",
      "country[T.Russia]         0.054629       3.244765    0.566004  4.028550e-01    1.000000e+00\n",
      "country[T.Serbia]        25.146153     498.970889  101.301128  2.220740e-14    6.195865e-12\n",
      "country[T.Thailand]       0.368270  117435.050030   32.515351  1.188474e-01    1.000000e+00\n",
      "country[T.UK]             0.000552       3.121936    0.237528  2.937876e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]       0.537643       2.157897    1.072205  5.726421e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]   0.387029       5.160012    1.371864  4.323592e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]   0.202363       2.599856    0.740967  4.551182e-01    1.000000e+00\n",
      "rec_period[T.2015+]       0.000529     101.112106    1.572797  6.028834e-01    1.000000e+00\n",
      "PFOA_q                    0.645995       1.510748    0.985797  5.899659e-01    1.000000e+00\n",
      "age_group                 1.516064       2.885704    2.061644  1.225284e-06    3.418543e-04\n",
      "Intercept                 0.000492       0.023216    0.003829  4.453902e-12    1.242639e-09\n",
      "Running logistic regression with parameter PFOA_q, signature DBS2\n",
      "Zero counts for signature DBS2: 521\n",
      "All counts for signature DBS2: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1173.9502 \n",
      "Link Function:          Logit              BIC:              -4905.4960\n",
      "Dependent Variable:     DBS2_bool          Log-Likelihood:   -569.98   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -616.89   \n",
      "No. Observations:       905                Deviance:         1140.0    \n",
      "Df Model:               16                 Pearson chi2:     910.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -0.6946   0.3449 -2.0138 0.0440 -1.3705 -0.0186\n",
      "sex[T.Male]              0.2746   0.1510  1.8187 0.0690 -0.0213  0.5705\n",
      "country[T.Brazil]       -1.2475   0.4420 -2.8224 0.0048 -2.1138 -0.3812\n",
      "country[T.Canada]       -1.2911   0.4167 -3.0981 0.0019 -2.1078 -0.4743\n",
      "country[T.Lithuania]    -0.9532   0.6308 -1.5112 0.1307 -2.1894  0.2831\n",
      "country[T.Poland]       -0.7042   0.6237 -1.1291 0.2589 -1.9266  0.5182\n",
      "country[T.Romania]       0.0798   0.3252  0.2453 0.8062 -0.5576  0.7172\n",
      "country[T.Russia]       -0.5226   0.2420 -2.1596 0.0308 -0.9969 -0.0483\n",
      "country[T.Serbia]       -0.1877   0.3327 -0.5641 0.5727 -0.8397  0.4643\n",
      "country[T.Thailand]     -0.9454   0.9970 -0.9482 0.3430 -2.8996  1.0088\n",
      "country[T.UK]           -0.5450   0.2953 -1.8453 0.0650 -1.1239  0.0339\n",
      "tobacco_ever[T.Yes]      0.8184   0.1521  5.3794 0.0000  0.5202  1.1166\n",
      "rec_period[T.2005-2010] -0.9739   0.2539 -3.8358 0.0001 -1.4715 -0.4763\n",
      "rec_period[T.2010-2015] -0.0465   0.2612 -0.1782 0.8586 -0.5585  0.4654\n",
      "rec_period[T.2015+]      0.3066   0.3980  0.7705 0.4410 -0.4734  1.0866\n",
      "PFOA_q                  -0.1119   0.0893 -1.2530 0.2102 -0.2870  0.0631\n",
      "age_group                0.3059   0.0662  4.6178 0.0000  0.1760  0.4357\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.253972  0.981600  0.499298  4.402946e-02        1.000000\n",
      "sex[T.Male]              0.978890  1.769185  1.315993  6.896461e-02        1.000000\n",
      "country[T.Brazil]        0.120780  0.683034  0.287223  4.765840e-03        1.000000\n",
      "country[T.Canada]        0.121503  0.622323  0.274980  1.947516e-03        0.543357\n",
      "country[T.Lithuania]     0.111980  1.327212  0.385514  1.307455e-01        1.000000\n",
      "country[T.Poland]        0.145648  1.679006  0.494515  2.588644e-01        1.000000\n",
      "country[T.Romania]       0.572557  2.048730  1.083058  8.062001e-01        1.000000\n",
      "country[T.Russia]        0.369040  0.952830  0.592986  3.079996e-02        1.000000\n",
      "country[T.Serbia]        0.431855  1.590944  0.828890  5.726524e-01        1.000000\n",
      "country[T.Thailand]      0.055047  2.742292  0.388531  3.430365e-01        1.000000\n",
      "country[T.UK]            0.325015  1.034446  0.579837  6.499395e-02        1.000000\n",
      "tobacco_ever[T.Yes]      1.682396  3.054417  2.266879  7.475223e-08        0.000021\n",
      "rec_period[T.2005-2010]  0.229588  0.621108  0.377622  1.251570e-04        0.034919\n",
      "rec_period[T.2010-2015]  0.572069  1.592665  0.954523  8.585763e-01        1.000000\n",
      "rec_period[T.2015+]      0.622892  2.964298  1.358837  4.410164e-01        1.000000\n",
      "PFOA_q                   0.750546  1.065174  0.894127  2.102002e-01        1.000000\n",
      "age_group                1.192494  1.546033  1.357806  3.878399e-06        0.001082\n",
      "Running logistic regression with parameter PFOA_q, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 819\n",
      "All counts for signature DBS4: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]              0.615452   1.592351  0.985308  8.081102e-01        1.000000\n",
      "country[T.Brazil]        0.190734   3.252916  0.787328  7.031041e-01        1.000000\n",
      "country[T.Canada]        0.219331   2.882112  0.816763  7.336319e-01        1.000000\n",
      "country[T.Lithuania]     0.592861  13.355219  2.853171  1.786323e-01        1.000000\n",
      "country[T.Poland]        0.073788   3.365889  0.715395  6.335707e-01        1.000000\n",
      "country[T.Romania]       0.190741   1.824298  0.651492  4.056808e-01        1.000000\n",
      "country[T.Russia]        0.475251   2.032794  0.990954  8.880343e-01        1.000000\n",
      "country[T.Serbia]        0.060702   1.176136  0.326010  8.899779e-02        1.000000\n",
      "country[T.Thailand]      0.002945   5.502524  0.426217  5.375023e-01        1.000000\n",
      "country[T.UK]            0.165077   1.209494  0.473063  1.195979e-01        1.000000\n",
      "tobacco_ever[T.Yes]      0.588275   1.535971  0.951255  8.076105e-01        1.000000\n",
      "rec_period[T.2005-2010]  0.320088   1.405528  0.669103  2.763082e-01        1.000000\n",
      "rec_period[T.2010-2015]  0.448075   2.084969  0.959832  8.560453e-01        1.000000\n",
      "rec_period[T.2015+]      0.324382   4.060001  1.162112  7.856004e-01        1.000000\n",
      "PFOA_q                   0.933308   1.652735  1.238380  1.377058e-01        1.000000\n",
      "age_group                1.047673   1.598542  1.290260  1.605569e-02        1.000000\n",
      "Intercept                0.020518   0.182298  0.063128  9.848353e-08        0.000027\n",
      "Running logistic regression with parameter PFOA_q, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 870\n",
      "All counts for signature DBS9: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.743452   3.357510  1.538319  0.231273        1.000000\n",
      "country[T.Brazil]        0.139829  11.073589  1.218965  0.732842        1.000000\n",
      "country[T.Canada]        0.232862  10.110591  1.675885  0.578035        1.000000\n",
      "country[T.Lithuania]     0.177308  33.065654  2.849541  0.362184        1.000000\n",
      "country[T.Poland]        0.005566   7.334396  0.748144  0.673095        1.000000\n",
      "country[T.Romania]       0.000866   0.989318  0.114043  0.045571        1.000000\n",
      "country[T.Russia]        0.295720   2.511468  0.863882  0.704158        1.000000\n",
      "country[T.Serbia]        0.148680   2.868088  0.704675  0.583970        1.000000\n",
      "country[T.Thailand]      0.017492  59.675496  2.898142  0.494995        1.000000\n",
      "country[T.UK]            0.094222   2.336798  0.559495  0.412196        1.000000\n",
      "tobacco_ever[T.Yes]      0.463320   1.941990  0.947978  0.790258        1.000000\n",
      "rec_period[T.2005-2010]  0.182729   2.154875  0.619796  0.400537        1.000000\n",
      "rec_period[T.2010-2015]  0.326817   3.821034  1.079712  0.725783        1.000000\n",
      "rec_period[T.2015+]      0.056258   3.661434  0.472293  0.461417        1.000000\n",
      "PFOA_q                   0.543307   1.253177  0.824454  0.352582        1.000000\n",
      "age_group                0.842295   1.564649  1.143076  0.374935        1.000000\n",
      "Intercept                0.010253   0.255009  0.055778  0.000094        0.026328\n",
      "Running logistic regression with parameter PFOA_q, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 814\n",
      "All counts for signature DBS78C: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]              0.956274  2.529472  1.537781  7.484022e-02        1.000000\n",
      "country[T.Brazil]        0.141383  1.826571  0.513051  2.981037e-01        1.000000\n",
      "country[T.Canada]        0.315796  2.929090  0.976601  9.225101e-01        1.000000\n",
      "country[T.Lithuania]     0.043280  2.731751  0.468566  4.113542e-01        1.000000\n",
      "country[T.Poland]        0.002512  2.853358  0.330208  3.652357e-01        1.000000\n",
      "country[T.Romania]       0.200396  1.675396  0.623789  3.517519e-01        1.000000\n",
      "country[T.Russia]        0.299650  1.404273  0.654403  2.750804e-01        1.000000\n",
      "country[T.Serbia]        0.188997  1.545651  0.566558  2.703653e-01        1.000000\n",
      "country[T.Thailand]      0.003385  5.549478  0.475803  5.889874e-01        1.000000\n",
      "country[T.UK]            0.374587  2.112782  0.909140  7.971670e-01        1.000000\n",
      "tobacco_ever[T.Yes]      0.752871  1.899661  1.192839  4.503723e-01        1.000000\n",
      "rec_period[T.2005-2010]  0.275585  1.522363  0.643763  3.030164e-01        1.000000\n",
      "rec_period[T.2010-2015]  0.713034  3.675799  1.582107  2.584897e-01        1.000000\n",
      "rec_period[T.2015+]      0.526696  5.388465  1.671442  3.813777e-01        1.000000\n",
      "PFOA_q                   0.722292  1.251439  0.949533  7.050799e-01        1.000000\n",
      "age_group                0.978417  1.464822  1.194682  8.048653e-02        1.000000\n",
      "Intercept                0.021204  0.186884  0.065321  9.674048e-08        0.000027\n",
      "Running logistic regression with parameter PFOA_q, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 850\n",
      "All counts for signature DBS78D: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                              2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]               0.396135    1.466976   0.762912  3.232524e-01    1.000000e+00\n",
      "country[T.Brazil]         0.013678    4.248815   0.271744  3.073936e-01    1.000000e+00\n",
      "country[T.Canada]         0.052497    7.950787   0.685000  6.739788e-01    1.000000e+00\n",
      "country[T.Lithuania]      0.068999   20.605737   1.349584  5.400216e-01    1.000000e+00\n",
      "country[T.Poland]         0.534540   39.975202   5.956732  1.004023e-01    1.000000e+00\n",
      "country[T.Romania]       14.975561  181.568315  47.075306  5.612585e-14    1.565911e-11\n",
      "country[T.Russia]         0.285546    6.231835   1.340416  6.228190e-01    1.000000e+00\n",
      "country[T.Serbia]         2.961342   53.230891  11.606990  2.877343e-04    8.027788e-02\n",
      "country[T.Thailand]       0.005246   24.441363   0.942980  6.664286e-01    1.000000e+00\n",
      "country[T.UK]             0.068379    5.296933   0.738367  6.745116e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]       0.386214    1.530120   0.771972  4.305409e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]   0.137273    1.419620   0.450874  1.588864e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]   0.240309    2.021428   0.699209  4.609826e-01    1.000000e+00\n",
      "rec_period[T.2015+]       0.234692   17.392366   2.096987  4.647172e-01    1.000000e+00\n",
      "PFOA_q                    0.647246    1.478039   0.976782  7.686391e-01    1.000000e+00\n",
      "age_group                 1.141753    2.055655   1.519842  3.675074e-03    1.000000e+00\n",
      "Intercept                 0.002566    0.073341   0.015057  1.561546e-08    4.356712e-06\n",
      "Running logistic regression with parameter PFOA_q, signature ID1\n",
      "Zero counts for signature ID1: 453\n",
      "All counts for signature ID1: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1206.7335 \n",
      "Link Function:          Logit              BIC:              -4872.7127\n",
      "Dependent Variable:     ID1_bool           Log-Likelihood:   -586.37   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -627.30   \n",
      "No. Observations:       905                Deviance:         1172.7    \n",
      "Df Model:               16                 Pearson chi2:     908.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -0.1555   0.3358 -0.4631 0.6433 -0.8136  0.5026\n",
      "sex[T.Male]              0.1436   0.1479  0.9704 0.3319 -0.1464  0.4335\n",
      "country[T.Brazil]       -1.2725   0.4331 -2.9381 0.0033 -2.1214 -0.4236\n",
      "country[T.Canada]       -0.6570   0.4074 -1.6126 0.1068 -1.4555  0.1415\n",
      "country[T.Lithuania]    -2.0955   0.6561 -3.1940 0.0014 -3.3814 -0.8096\n",
      "country[T.Poland]       -1.2403   0.7994 -1.5515 0.1208 -2.8071  0.3265\n",
      "country[T.Romania]      -0.9450   0.3266 -2.8936 0.0038 -1.5851 -0.3049\n",
      "country[T.Russia]       -0.3212   0.2342 -1.3715 0.1702 -0.7802  0.1378\n",
      "country[T.Serbia]       -1.2411   0.3372 -3.6803 0.0002 -1.9021 -0.5801\n",
      "country[T.Thailand]      0.0094   1.1787  0.0080 0.9936 -2.3008  2.3196\n",
      "country[T.UK]           -0.6900   0.2887 -2.3905 0.0168 -1.2558 -0.1243\n",
      "tobacco_ever[T.Yes]     -0.3361   0.1473 -2.2825 0.0225 -0.6248 -0.0475\n",
      "rec_period[T.2005-2010]  0.0104   0.2460  0.0424 0.9662 -0.4716  0.4925\n",
      "rec_period[T.2010-2015]  0.7808   0.2603  2.9999 0.0027  0.2707  1.2909\n",
      "rec_period[T.2015+]      1.0743   0.3938  2.7278 0.0064  0.3024  1.8462\n",
      "PFOA_q                  -0.2318   0.0872 -2.6562 0.0079 -0.4028 -0.0607\n",
      "age_group                0.2970   0.0642  4.6251 0.0000  0.1711  0.4228\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept                0.443253   1.653037  0.855987  0.643290        1.000000\n",
      "sex[T.Male]              0.863806   1.542708  1.154383  0.331857        1.000000\n",
      "country[T.Brazil]        0.119865   0.654657  0.280126  0.003302        0.921292\n",
      "country[T.Canada]        0.233292   1.152001  0.518413  0.106824        1.000000\n",
      "country[T.Lithuania]     0.033999   0.445025  0.123005  0.001403        0.391502\n",
      "country[T.Poland]        0.060377   1.386162  0.289297  0.120783        1.000000\n",
      "country[T.Romania]       0.204923   0.737191  0.388673  0.003809        1.000000\n",
      "country[T.Russia]        0.458335   1.147762  0.725300  0.170229        1.000000\n",
      "country[T.Serbia]        0.149258   0.559818  0.289063  0.000233        0.065005\n",
      "country[T.Thailand]      0.100184  10.171405  1.009459  0.993627        1.000000\n",
      "country[T.UK]            0.284844   0.883129  0.501552  0.016825        1.000000\n",
      "tobacco_ever[T.Yes]      0.535395   0.953614  0.714535  0.022460        1.000000\n",
      "rec_period[T.2005-2010]  0.623983   1.636365  1.010477  0.966199        1.000000\n",
      "rec_period[T.2010-2015]  1.310821   3.635961  2.183139  0.002701        0.753539\n",
      "rec_period[T.2015+]      1.353086   6.335408  2.927857  0.006376        1.000000\n",
      "PFOA_q                   0.668472   0.941061  0.793141  0.007902        1.000000\n",
      "age_group                1.186634   1.526247  1.345770  0.000004        0.001045\n",
      "Running logistic regression with parameter PFOA_q, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 891\n",
      "All counts for signature ID2: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%       97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.330938    2.845118  0.948780  0.452097        1.000000\n",
      "country[T.Brazil]        0.001427    8.849904  0.257990  0.304796        1.000000\n",
      "country[T.Canada]        0.002622   10.096414  0.440034  0.545677        1.000000\n",
      "country[T.Lithuania]     0.009077   48.894666  1.574998  0.310116        1.000000\n",
      "country[T.Poland]        0.013452   26.534858  1.918628  0.363340        1.000000\n",
      "country[T.Romania]       0.291545   10.660059  1.940334  0.288409        1.000000\n",
      "country[T.Russia]        0.191586    4.189098  0.912848  0.534406        1.000000\n",
      "country[T.Serbia]        0.002894    5.585384  0.417304  0.386681        1.000000\n",
      "country[T.Thailand]      0.023076  142.334241  4.176995  0.310075        1.000000\n",
      "country[T.UK]            0.311966    8.255122  1.754867  0.361105        1.000000\n",
      "tobacco_ever[T.Yes]      0.187924    1.756786  0.599208  0.272867        1.000000\n",
      "rec_period[T.2005-2010]  0.413168    9.774989  1.723336  0.335760        1.000000\n",
      "rec_period[T.2010-2015]  0.151402    5.343073  0.818101  0.489929        1.000000\n",
      "rec_period[T.2015+]      0.052128   11.173749  0.922669  0.566887        1.000000\n",
      "PFOA_q                   0.542253    1.877663  0.983103  0.578158        1.000000\n",
      "age_group                0.580251    1.459445  0.917234  0.503667        1.000000\n",
      "Intercept                0.002825    0.290004  0.034997  0.000872        0.243248\n",
      "Running logistic regression with parameter PFOA_q, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 869\n",
      "All counts for signature ID3: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                             2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "sex[T.Male]              0.468392    1.951593   0.943120  7.439903e-01        1.000000\n",
      "country[T.Brazil]        0.436462   62.422763   4.788154  1.808538e-01        1.000000\n",
      "country[T.Canada]        0.314186   21.176182   2.995363  3.111519e-01        1.000000\n",
      "country[T.Lithuania]     0.020628   80.101168   3.467098  4.121789e-01        1.000000\n",
      "country[T.Poland]        0.375075   27.566920   4.152898  1.885444e-01        1.000000\n",
      "country[T.Romania]       2.622227   25.070599   7.889541  2.145496e-04        0.059859\n",
      "country[T.Russia]        0.197834    2.796026   0.787596  6.014104e-01        1.000000\n",
      "country[T.Serbia]        0.077042    4.862468   0.824341  6.806974e-01        1.000000\n",
      "country[T.Thailand]      2.119369  695.344874  40.653921  1.494847e-02        1.000000\n",
      "country[T.UK]            0.501628    5.566160   1.732595  3.399258e-01        1.000000\n",
      "tobacco_ever[T.Yes]      0.903499    3.891788   1.841647  8.782251e-02        1.000000\n",
      "rec_period[T.2005-2010]  0.679310    7.070546   2.013464  1.951712e-01        1.000000\n",
      "rec_period[T.2010-2015]  0.398559    4.667899   1.270819  5.962563e-01        1.000000\n",
      "rec_period[T.2015+]      0.036781    3.294503   0.389164  3.747291e-01        1.000000\n",
      "PFOA_q                   0.709430    1.716057   1.095589  6.169587e-01        1.000000\n",
      "age_group                0.800981    1.494903   1.090135  5.224979e-01        1.000000\n",
      "Intercept                0.002040    0.067386   0.013023  1.139845e-08        0.000003\n",
      "Running logistic regression with parameter PFOA_q, signature ID5\n",
      "Zero counts for signature ID5: 453\n",
      "All counts for signature ID5: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1088.4244 \n",
      "Link Function:          Logit              BIC:              -4991.0218\n",
      "Dependent Variable:     ID5_bool           Log-Likelihood:   -527.21   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -627.30   \n",
      "No. Observations:       905                Deviance:         1054.4    \n",
      "Df Model:               16                 Pearson chi2:     907.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -1.8957   0.3706 -5.1148 0.0000 -2.6221 -1.1693\n",
      "sex[T.Male]              0.5314   0.1601  3.3198 0.0009  0.2177  0.8451\n",
      "country[T.Brazil]       -0.8526   0.4629 -1.8419 0.0655 -1.7598  0.0547\n",
      "country[T.Canada]       -0.7807   0.4327 -1.8045 0.0711 -1.6287  0.0672\n",
      "country[T.Lithuania]    -0.2797   0.6559 -0.4265 0.6698 -1.5654  1.0059\n",
      "country[T.Poland]       -0.7208   0.6866 -1.0499 0.2938 -2.0665  0.6248\n",
      "country[T.Romania]       0.3365   0.3594  0.9363 0.3491 -0.3679  1.0409\n",
      "country[T.Russia]       -0.6228   0.2501 -2.4907 0.0128 -1.1130 -0.1327\n",
      "country[T.Serbia]       -0.3445   0.3570 -0.9649 0.3346 -1.0442  0.3553\n",
      "country[T.Thailand]     -2.2864   1.2137 -1.8838 0.0596 -4.6652  0.0924\n",
      "country[T.UK]           -0.4967   0.3073 -1.6163 0.1060 -1.0991  0.1056\n",
      "tobacco_ever[T.Yes]     -0.0555   0.1581 -0.3508 0.7257 -0.3654  0.2545\n",
      "rec_period[T.2005-2010]  0.2144   0.2654  0.8081 0.4190 -0.3056  0.7345\n",
      "rec_period[T.2010-2015]  0.6219   0.2824  2.2018 0.0277  0.0683  1.1755\n",
      "rec_period[T.2015+]      0.2239   0.4206  0.5324 0.5944 -0.6004  1.0483\n",
      "PFOA_q                   0.0262   0.0919  0.2845 0.7761 -0.1540  0.2063\n",
      "age_group                0.8379   0.0761 11.0045 0.0000  0.6887  0.9872\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.072649  0.310591  0.150214  3.140470e-07    8.761911e-05\n",
      "sex[T.Male]              1.243169  2.328264  1.701301  9.009069e-04    2.513530e-01\n",
      "country[T.Brazil]        0.172084  1.056190  0.426325  6.549584e-02    1.000000e+00\n",
      "country[T.Canada]        0.196181  1.069558  0.458069  7.114707e-02    1.000000e+00\n",
      "country[T.Lithuania]     0.209010  2.734337  0.755978  6.697639e-01    1.000000e+00\n",
      "country[T.Poland]        0.126634  1.867923  0.486356  2.937705e-01    1.000000e+00\n",
      "country[T.Romania]       0.692195  2.831792  1.400055  3.491027e-01    1.000000e+00\n",
      "country[T.Russia]        0.328588  0.875718  0.536424  1.275018e-02    1.000000e+00\n",
      "country[T.Serbia]        0.351970  1.426548  0.708592  3.346037e-01    1.000000e+00\n",
      "country[T.Thailand]      0.009418  1.096821  0.101635  5.958944e-02    1.000000e+00\n",
      "country[T.UK]            0.333178  1.111378  0.608512  1.060193e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.693909  1.289775  0.946037  7.257429e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.736650  2.084483  1.239167  4.190154e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]  1.070703  3.239752  1.862474  2.767730e-02    1.000000e+00\n",
      "rec_period[T.2015+]      0.548592  2.852715  1.250990  5.944246e-01    1.000000e+00\n",
      "PFOA_q                   0.857237  1.229180  1.026498  7.760546e-01    1.000000e+00\n",
      "age_group                1.991114  2.683644  2.311588  3.633904e-28    1.013859e-25\n",
      "Running logistic regression with parameter PFOA_q, signature ID8\n",
      "Zero counts for signature ID8: 240\n",
      "All counts for signature ID8: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              995.7783  \n",
      "Link Function:          Logit              BIC:              -5083.6679\n",
      "Dependent Variable:     ID8_bool           Log-Likelihood:   -480.89   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -523.47   \n",
      "No. Observations:       905                Deviance:         961.78    \n",
      "Df Model:               16                 Pearson chi2:     898.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept                0.3185   0.3821  0.8337 0.4044 -0.4303  1.0673\n",
      "sex[T.Male]              0.3952   0.1656  2.3867 0.0170  0.0707  0.7197\n",
      "country[T.Brazil]       -1.3448   0.4940 -2.7220 0.0065 -2.3130 -0.3765\n",
      "country[T.Canada]       -0.6577   0.4728 -1.3910 0.1642 -1.5845  0.2690\n",
      "country[T.Lithuania]    -0.0096   0.8748 -0.0110 0.9912 -1.7241  1.7049\n",
      "country[T.Poland]       -0.2365   0.6670 -0.3546 0.7229 -1.5438  1.0707\n",
      "country[T.Romania]      -0.8275   0.3597 -2.3005 0.0214 -1.5325 -0.1225\n",
      "country[T.Russia]       -0.3112   0.2679 -1.1616 0.2454 -0.8363  0.2139\n",
      "country[T.Serbia]       -0.8763   0.3633 -2.4122 0.0159 -1.5884 -0.1643\n",
      "country[T.Thailand]     -1.7477   1.0168 -1.7187 0.0857 -3.7406  0.2453\n",
      "country[T.UK]           -0.3794   0.3302 -1.1491 0.2505 -1.0266  0.2678\n",
      "tobacco_ever[T.Yes]     -0.0144   0.1662 -0.0866 0.9310 -0.3402  0.3114\n",
      "rec_period[T.2005-2010]  0.0149   0.2886  0.0515 0.9589 -0.5507  0.5805\n",
      "rec_period[T.2010-2015]  0.0493   0.2978  0.1657 0.8684 -0.5343  0.6330\n",
      "rec_period[T.2015+]      0.3574   0.4595  0.7778 0.4367 -0.5432  1.2580\n",
      "PFOA_q                  -0.0995   0.0994 -1.0008 0.3169 -0.2943  0.0954\n",
      "age_group                0.5530   0.0748  7.3896 0.0000  0.4063  0.6997\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.650321  2.907642  1.375101  4.044398e-01    1.000000e+00\n",
      "sex[T.Male]              1.073221  2.053860  1.484671  1.699857e-02    1.000000e+00\n",
      "country[T.Brazil]        0.098959  0.686280  0.260603  6.488868e-03    1.000000e+00\n",
      "country[T.Canada]        0.205052  1.308672  0.518021  1.642146e-01    1.000000e+00\n",
      "country[T.Lithuania]     0.178326  5.500674  0.990409  9.912103e-01    1.000000e+00\n",
      "country[T.Poland]        0.213577  2.917393  0.789359  7.228570e-01    1.000000e+00\n",
      "country[T.Romania]       0.216002  0.884705  0.437148  2.141793e-02    1.000000e+00\n",
      "country[T.Russia]        0.433298  1.238481  0.732552  2.453838e-01    1.000000e+00\n",
      "country[T.Serbia]        0.204262  0.848482  0.416308  1.585455e-02    1.000000e+00\n",
      "country[T.Thailand]      0.023740  1.278005  0.174182  8.566458e-02    1.000000e+00\n",
      "country[T.UK]            0.358217  1.307034  0.684253  2.505273e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.711608  1.365368  0.985701  9.309631e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.576522  1.786860  1.014970  9.589353e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.586069  1.883240  1.050575  8.684084e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.580870  3.518514  1.429616  4.366938e-01    1.000000e+00\n",
      "PFOA_q                   0.745020  1.100050  0.905295  3.169244e-01    1.000000e+00\n",
      "age_group                1.501307  2.013127  1.738483  1.472435e-13    4.108093e-11\n",
      "Running logistic regression with parameter PFOA_q, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Covariate rec_period, sig ID9, perfect or near-perfect separation for category 1999-2005. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig ID9, perfect or near-perfect separation for category 2. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 901\n",
      "All counts for signature ID9: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%         97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.192666     11.288503  1.166018  0.523023        1.000000\n",
      "country[T.Brazil]        0.000103   1257.291241  0.483601  0.473684        1.000000\n",
      "country[T.Canada]        0.000783   1375.822231  1.729936  0.570188        1.000000\n",
      "country[T.Lithuania]     0.000624   4488.680471  2.549329  0.257149        1.000000\n",
      "country[T.Poland]        0.031138   1138.258875  5.957004  0.226253        1.000000\n",
      "country[T.Romania]       0.007017     26.355846  1.145722  0.471724        1.000000\n",
      "country[T.Russia]        0.002106      6.920416  0.354162  0.402307        1.000000\n",
      "country[T.Serbia]        0.138182    576.117369  3.677603  0.361625        1.000000\n",
      "country[T.Thailand]      0.001894  13975.014671  7.892246  0.293592        1.000000\n",
      "country[T.UK]            0.000190      8.519942  0.526180  0.524199        1.000000\n",
      "tobacco_ever[T.Yes]      0.280785     18.227184  1.765099  0.470000        1.000000\n",
      "rec_period[T.2005-2010]  0.295448    548.480884  3.904344  0.242088        1.000000\n",
      "rec_period[T.2010-2015]  0.008107    272.518368  1.344830  0.590883        1.000000\n",
      "rec_period[T.2015+]      0.001798   9254.263469  2.441063  0.694823        1.000000\n",
      "PFOA_q                   0.368281      3.677908  1.079703  0.718500        1.000000\n",
      "age_group                0.496039      2.802135  1.125960  0.583701        1.000000\n",
      "Intercept                0.000007      0.144302  0.002750  0.000898        0.250544\n",
      "Running logistic regression with parameter PFOA_q, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig ID11, perfect or near-perfect separation for category 0. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 897\n",
      "All counts for signature ID11: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.127691   2.226235  0.567440  0.267521        1.000000\n",
      "country[T.Brazil]        0.000619   5.349209  0.129283  0.242302        1.000000\n",
      "country[T.Canada]        0.031158  14.487765  0.752804  0.581321        1.000000\n",
      "country[T.Lithuania]     0.002588  19.240079  0.515797  0.354797        1.000000\n",
      "country[T.Poland]        0.027434  92.425861  4.224934  0.259656        1.000000\n",
      "country[T.Romania]       0.003572   7.394456  0.522155  0.354956        1.000000\n",
      "country[T.Russia]        0.132430   6.766674  0.939319  0.686192        1.000000\n",
      "country[T.Serbia]        0.002591   7.008739  0.396133  0.422729        1.000000\n",
      "country[T.Thailand]      0.004335  37.630582  0.900977  0.542343        1.000000\n",
      "country[T.UK]            0.032589   6.894114  0.643108  0.538123        1.000000\n",
      "tobacco_ever[T.Yes]      0.025045   1.275971  0.246453  0.088917        1.000000\n",
      "rec_period[T.2005-2010]  0.131082  13.832461  1.151001  0.526193        1.000000\n",
      "rec_period[T.2010-2015]  0.155702  18.147041  1.425974  0.530323        1.000000\n",
      "rec_period[T.2015+]      0.135796  89.586504  3.409979  0.374698        1.000000\n",
      "PFOA_q                   0.500721   2.477430  1.090471  0.625446        1.000000\n",
      "age_group                0.669460   2.333165  1.216686  0.465617        1.000000\n",
      "Intercept                0.000602   0.271768  0.017593  0.001910        0.532915\n",
      "Running logistic regression with parameter PFOA_q, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Covariate rec_period, sig ID12, perfect or near-perfect separation for category 1999-2005. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig ID12, perfect or near-perfect separation for category 2. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 898\n",
      "All counts for signature ID12: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "sex[T.Male]              0.130102     2.413364   0.583499  0.239320             1.0\n",
      "country[T.Brazil]        0.062395   538.831925   2.228560  0.354188             1.0\n",
      "country[T.Canada]        0.003579   296.978290   0.980570  0.656171             1.0\n",
      "country[T.Lithuania]     0.016083  1337.239526   4.446854  0.180023             1.0\n",
      "country[T.Poland]        0.047967  2787.791867  10.216065  0.141546             1.0\n",
      "country[T.Romania]       0.011223   479.718231   2.287359  0.304547             1.0\n",
      "country[T.Russia]        0.183055   466.653532   2.875616  0.281094             1.0\n",
      "country[T.Serbia]        0.005971   325.582005   1.334690  0.387584             1.0\n",
      "country[T.Thailand]      0.050211  4850.523403  15.097060  0.167538             1.0\n",
      "country[T.UK]            0.183536  1093.398605   5.680626  0.214689             1.0\n",
      "tobacco_ever[T.Yes]      0.122430     2.949395   0.682938  0.378366             1.0\n",
      "rec_period[T.2005-2010]  0.036081   154.185009   0.947796  0.539952             1.0\n",
      "rec_period[T.2010-2015]  0.035227   130.995323   0.780890  0.409649             1.0\n",
      "rec_period[T.2015+]      0.041163   388.779298   1.893319  0.445805             1.0\n",
      "PFOA_q                   0.218513     1.980999   0.688891  0.369084             1.0\n",
      "age_group                0.207243     0.922728   0.468592  0.021735             1.0\n",
      "Intercept                0.000135     1.365968   0.044201  0.064405             1.0\n",
      "Running logistic regression with parameter PFOA_q, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category Poland. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter PFOA_q, sig ID83C, perfect or near-perfect separation for category 3. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 889\n",
      "All counts for signature ID83C: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                              2.5%         97.5%          OR       p-value  p-value (corr)\n",
      "sex[T.Male]               0.486016      5.424796    1.536794  3.190191e-01        1.000000\n",
      "country[T.Brazil]         0.000469   6100.720474    1.930515  3.081291e-01        1.000000\n",
      "country[T.Canada]         0.003994  12598.364507    8.151034  4.056565e-01        1.000000\n",
      "country[T.Lithuania]      0.002642  29475.092035   10.189594  9.488978e-02        1.000000\n",
      "country[T.Poland]         0.201144   9522.885439   43.132409  8.612941e-02        1.000000\n",
      "country[T.Romania]       14.010818  22637.036193  158.027579  2.344057e-07        0.000065\n",
      "country[T.Russia]         0.009543    378.038258    1.905553  6.286674e-01        1.000000\n",
      "country[T.Serbia]         3.780456  12207.853474   72.398208  2.553982e-03        0.712561\n",
      "country[T.Thailand]       0.006856  82081.571529   27.308515  1.268290e-01        1.000000\n",
      "country[T.UK]             0.001053    734.260474    3.365015  5.186844e-01        1.000000\n",
      "tobacco_ever[T.Yes]       0.224621      2.423229    0.754906  5.616114e-01        1.000000\n",
      "rec_period[T.2005-2010]   0.616974     19.905815    3.061628  1.467481e-01        1.000000\n",
      "rec_period[T.2010-2015]   0.124596      4.468674    0.683353  5.072716e-01        1.000000\n",
      "rec_period[T.2015+]       0.008404   3008.057743    2.309156  6.500145e-01        1.000000\n",
      "PFOA_q                    0.426564      2.023652    0.944958  7.153399e-01        1.000000\n",
      "age_group                 0.923943      2.653314    1.520932  9.292065e-02        1.000000\n",
      "Intercept                 0.000002      0.013380    0.000478  1.741913e-08        0.000005\n",
      "Running logistic regression with parameter PFOA_q, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 454\n",
      "All counts for signature SBS_burden: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                             2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]              1.400421   2.721313  1.946636  6.605911e-05    1.843049e-02\n",
      "country[T.Brazil]        0.372389   2.500245  0.962974  9.307675e-01    1.000000e+00\n",
      "country[T.Canada]        0.265531   1.565001  0.645415  3.319211e-01    1.000000e+00\n",
      "country[T.Lithuania]     0.489935   6.903272  1.815612  3.721716e-01    1.000000e+00\n",
      "country[T.Poland]        0.152245   2.232151  0.602084  4.508468e-01    1.000000e+00\n",
      "country[T.Romania]       2.617409  17.114691  6.323369  1.648403e-05    4.599045e-03\n",
      "country[T.Russia]        0.357033   0.987760  0.595565  4.450690e-02    1.000000e+00\n",
      "country[T.Serbia]        0.543504   2.335168  1.124588  7.472723e-01    1.000000e+00\n",
      "country[T.Thailand]      0.000651   0.985589  0.090100  4.833589e-02    1.000000e+00\n",
      "country[T.UK]            0.651418   2.296288  1.219650  5.346279e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.813512   1.562061  1.126682  4.698397e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.324505   0.986940  0.567603  4.404335e-02    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.400413   1.309555  0.725341  2.862012e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.100308   0.591917  0.245838  1.676056e-03    4.676196e-01\n",
      "PFOA_q                   0.647526   0.949804  0.785527  1.257724e-02    1.000000e+00\n",
      "age_group                2.446812   3.418918  2.879398  2.635135e-47    7.352026e-45\n",
      "Intercept                0.103308   0.473254  0.222288  9.227357e-05    2.574433e-02\n",
      "Running logistic regression with parameter PFOA_q, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 489\n",
      "All counts for signature DBS_burden: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "sex[T.Male]              1.138877  2.164982  1.567430  5.721525e-03    1.000000e+00\n",
      "country[T.Brazil]        0.320353  2.081402  0.815218  6.681113e-01    1.000000e+00\n",
      "country[T.Canada]        0.203120  1.175312  0.490556  1.100683e-01    1.000000e+00\n",
      "country[T.Lithuania]     0.528608  7.590633  1.924656  3.245811e-01    1.000000e+00\n",
      "country[T.Poland]        0.219273  2.672884  0.791925  7.049170e-01    1.000000e+00\n",
      "country[T.Romania]       1.757450  9.654117  3.957911  6.745506e-04    1.881996e-01\n",
      "country[T.Russia]        0.431971  1.178076  0.714807  1.876270e-01    1.000000e+00\n",
      "country[T.Serbia]        0.583123  2.335322  1.164862  6.646338e-01    1.000000e+00\n",
      "country[T.Thailand]      0.000416  0.636825  0.057836  1.624429e-02    1.000000e+00\n",
      "country[T.UK]            0.530984  2.168139  1.067163  8.545766e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.806332  1.525231  1.108454  5.245589e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.740221  2.218612  1.279610  3.719376e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]  0.801114  2.436155  1.394275  2.398620e-01    1.000000e+00\n",
      "rec_period[T.2015+]      0.412833  2.308738  0.979114  9.582905e-01    1.000000e+00\n",
      "PFOA_q                   0.856478  1.245301  1.032928  7.337664e-01    1.000000e+00\n",
      "age_group                1.766738  2.378901  2.044098  2.157336e-24    6.018968e-22\n",
      "Intercept                0.077418  0.339258  0.163554  8.917115e-07    2.487875e-04\n",
      "Running logistic regression with parameter PFOA_q, signature ID_burden\n",
      "Zero counts for signature ID_burden: 449\n",
      "All counts for signature ID_burden: 905\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever', 'rec_period']\n",
      "Number of tests =  279\n",
      "                   Results: Generalized linear model\n",
      "=======================================================================\n",
      "Model:                  GLM                AIC:              1050.0311 \n",
      "Link Function:          Logit              BIC:              -5029.4151\n",
      "Dependent Variable:     ID_burden_bool     Log-Likelihood:   -508.02   \n",
      "Date:                   2024-02-08 17:23   LL-Null:          -627.27   \n",
      "No. Observations:       905                Deviance:         1016.0    \n",
      "Df Model:               16                 Pearson chi2:     899.      \n",
      "Df Residuals:           888                Scale:            1.0000    \n",
      "Method:                 IRLS                                           \n",
      "-----------------------------------------------------------------------\n",
      "                         Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "-----------------------------------------------------------------------\n",
      "Intercept               -2.0140   0.3809 -5.2867 0.0000 -2.7606 -1.2673\n",
      "sex[T.Male]              0.6574   0.1648  3.9887 0.0001  0.3344  0.9805\n",
      "country[T.Brazil]       -0.7433   0.4735 -1.5698 0.1165 -1.6714  0.1847\n",
      "country[T.Canada]       -0.9940   0.4453 -2.2324 0.0256 -1.8667 -0.1213\n",
      "country[T.Lithuania]    -0.4962   0.6660 -0.7451 0.4562 -1.8016  0.8092\n",
      "country[T.Poland]       -0.5449   0.6984 -0.7802 0.4353 -1.9137  0.8239\n",
      "country[T.Romania]       0.5351   0.3796  1.4098 0.1586 -0.2088  1.2791\n",
      "country[T.Russia]       -0.6493   0.2567 -2.5300 0.0114 -1.1524 -0.1463\n",
      "country[T.Serbia]       -0.5856   0.3663 -1.5988 0.1099 -1.3035  0.1323\n",
      "country[T.Thailand]     -2.2505   1.2178 -1.8479 0.0646 -4.6374  0.1364\n",
      "country[T.UK]           -0.3627   0.3161 -1.1472 0.2513 -0.9823  0.2569\n",
      "tobacco_ever[T.Yes]     -0.1931   0.1618 -1.1932 0.2328 -0.5102  0.1240\n",
      "rec_period[T.2005-2010]  0.4005   0.2726  1.4693 0.1417 -0.1337  0.9347\n",
      "rec_period[T.2010-2015]  0.7434   0.2911  2.5540 0.0107  0.1729  1.3140\n",
      "rec_period[T.2015+]      0.1234   0.4318  0.2858 0.7751 -0.7229  0.9697\n",
      "PFOA_q                  -0.0591   0.0943 -0.6268 0.5308 -0.2439  0.1257\n",
      "age_group                0.9441   0.0801 11.7847 0.0000  0.7871  1.1011\n",
      "=======================================================================\n",
      "\n",
      "                             2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept                0.063253  0.281584  0.133457  1.245272e-07    3.474310e-05\n",
      "sex[T.Male]              1.397073  2.665743  1.929829  6.644943e-05    1.853939e-02\n",
      "country[T.Brazil]        0.187991  1.202912  0.475538  1.164618e-01    1.000000e+00\n",
      "country[T.Canada]        0.154627  0.885777  0.370088  2.559145e-02    1.000000e+00\n",
      "country[T.Lithuania]     0.165028  2.246023  0.608816  4.562310e-01    1.000000e+00\n",
      "country[T.Poland]        0.147530  2.279391  0.579895  4.352516e-01    1.000000e+00\n",
      "country[T.Romania]       0.811520  3.593466  1.707680  1.586063e-01    1.000000e+00\n",
      "country[T.Russia]        0.315881  0.863897  0.522388  1.140664e-02    1.000000e+00\n",
      "country[T.Serbia]        0.271584  1.141443  0.556774  1.098693e-01    1.000000e+00\n",
      "country[T.Thailand]      0.009683  1.146186  0.105349  6.461256e-02    1.000000e+00\n",
      "country[T.UK]            0.374452  1.292960  0.695810  2.512878e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]      0.600397  1.132072  0.824435  2.327744e-01    1.000000e+00\n",
      "rec_period[T.2005-2010]  0.874819  2.546524  1.492564  1.417475e-01    1.000000e+00\n",
      "rec_period[T.2010-2015]  1.188760  3.720914  2.103158  1.065019e-02    1.000000e+00\n",
      "rec_period[T.2015+]      0.485328  2.637237  1.131337  7.750505e-01    1.000000e+00\n",
      "PFOA_q                   0.783589  1.133933  0.942622  5.308180e-01    1.000000e+00\n",
      "age_group                2.196994  3.007560  2.570523  4.683157e-32    1.306601e-29\n",
      "Using below/above median model for signature SBS1, its frequency is 0.76\n",
      "Using below/above median model for signature SBS1536A, its frequency is 0.86\n",
      "Using below/above median model for signature SBS1536B, its frequency is 0.89\n",
      "Using below/above median model for signature ID1, its frequency is 0.85\n",
      "Using below/above median model for signature ID5, its frequency is 0.93\n",
      "Running logistic regression with parameter fam_rcc, signature SBS1\n",
      "Zero counts for signature SBS1: 406\n",
      "All counts for signature SBS1: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1064.7737 \n",
      "Link Function:         Logit             BIC:             -4301.7459\n",
      "Dependent Variable:    SBS1_bool         Log-Likelihood:  -518.39   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -562.14   \n",
      "No. Observations:      811               Deviance:        1036.8    \n",
      "Df Model:              13                Pearson chi2:    810.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.2765   0.2616 -4.8796 0.0000 -1.7892 -0.7638\n",
      "fam_rcc[T.Yes]        0.0595   0.3298  0.1805 0.8568 -0.5869  0.7059\n",
      "sex[T.Male]           0.0322   0.1583  0.2034 0.8388 -0.2780  0.3424\n",
      "country[T.Brazil]     0.2942   0.2644  1.1130 0.2657 -0.2239  0.8124\n",
      "country[T.Canada]     0.9511   0.3553  2.6770 0.0074  0.2548  1.6475\n",
      "country[T.Japan]      0.4335   0.3844  1.1276 0.2595 -0.3200  1.1869\n",
      "country[T.Lithuania]  0.9577   0.5721  1.6739 0.0942 -0.1637  2.0791\n",
      "country[T.Romania]   -1.0443   0.3611 -2.8920 0.0038 -1.7521 -0.3366\n",
      "country[T.Russia]     0.7539   0.2216  3.4021 0.0007  0.3196  1.1883\n",
      "country[T.Serbia]    -0.2640   0.3085 -0.8557 0.3921 -0.8686  0.3407\n",
      "country[T.Thailand]   0.3848   0.9407  0.4091 0.6825 -1.4590  2.2287\n",
      "country[T.UK]         0.6217   0.2574  2.4147 0.0157  0.1171  1.1262\n",
      "tobacco_ever[T.Yes]   0.0662   0.1578  0.4196 0.6748 -0.2431  0.3755\n",
      "age_group             0.4603   0.0697  6.6087 0.0000  0.3238  0.5968\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.167096  0.465914  0.279020  1.063147e-06    2.966181e-04\n",
      "fam_rcc[T.Yes]        0.556074  2.025700  1.061338  8.567534e-01    1.000000e+00\n",
      "sex[T.Male]           0.757302  1.408299  1.032719  8.388008e-01    1.000000e+00\n",
      "country[T.Brazil]     0.799403  2.253247  1.342107  2.656905e-01    1.000000e+00\n",
      "country[T.Canada]     1.290165  5.194122  2.588682  7.428080e-03    1.000000e+00\n",
      "country[T.Japan]      0.726171  3.276860  1.542583  2.594903e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.849023  7.997176  2.605722  9.415017e-02    1.000000e+00\n",
      "country[T.Romania]    0.173410  0.714211  0.351925  3.827648e-03    1.000000e+00\n",
      "country[T.Russia]     1.376564  3.281394  2.125335  6.686398e-04    1.865505e-01\n",
      "country[T.Serbia]     0.419521  1.405865  0.767978  3.921430e-01    1.000000e+00\n",
      "country[T.Thailand]   0.232474  9.287375  1.469377  6.824804e-01    1.000000e+00\n",
      "country[T.UK]         1.124202  3.084056  1.862016  1.574763e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.784201  1.455775  1.068466  6.747567e-01    1.000000e+00\n",
      "age_group             1.382371  1.816364  1.584578  3.877296e-11    1.081766e-08\n",
      "Running logistic regression with parameter fam_rcc, signature SBS2\n",
      "** Warning: Covariate country, sig SBS2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig SBS2, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS2: 806\n",
      "All counts for signature SBS2: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%       97.5%         OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.011939   15.734065   1.612248  5.353236e-01        1.000000\n",
      "sex[T.Male]           0.335114   19.251836   1.922876  3.609381e-01        1.000000\n",
      "country[T.Brazil]     0.005117   14.733300   0.756105  5.025859e-01        1.000000\n",
      "country[T.Canada]     0.008935   25.218603   1.315638  8.258217e-01        1.000000\n",
      "country[T.Japan]      0.008679   26.363447   1.305134  4.424912e-01        1.000000\n",
      "country[T.Lithuania]  0.026829   85.545310   4.056269  2.943455e-01        1.000000\n",
      "country[T.Romania]    0.007338   21.851372   1.093508  4.808517e-01        1.000000\n",
      "country[T.Russia]     0.235002   23.859012   1.981535  3.034383e-01        1.000000\n",
      "country[T.Serbia]     0.007330   21.929856   1.096825  4.832757e-01        1.000000\n",
      "country[T.Thailand]   0.079664  328.240747  12.888509  1.767137e-01        1.000000\n",
      "country[T.UK]         0.353030   32.238854   2.791163  2.525025e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.236296    8.369344   1.285688  5.529842e-01        1.000000\n",
      "age_group             0.572507    2.990474   1.262956  4.424060e-01        1.000000\n",
      "Intercept             0.000077    0.047663   0.003071  2.947363e-07        0.000082\n",
      "Running logistic regression with parameter fam_rcc, signature SBS4\n",
      "Zero counts for signature SBS4: 349\n",
      "All counts for signature SBS4: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1055.5864 \n",
      "Link Function:         Logit             BIC:             -4310.9333\n",
      "Dependent Variable:    SBS4_bool         Log-Likelihood:  -513.79   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -554.24   \n",
      "No. Observations:      811               Deviance:        1027.6    \n",
      "Df Model:              13                Pearson chi2:    804.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.5082   0.2567 -1.9796 0.0477 -1.0114 -0.0050\n",
      "fam_rcc[T.Yes]        0.0976   0.3397  0.2873 0.7739 -0.5683  0.7634\n",
      "sex[T.Male]          -0.2121   0.1590 -1.3343 0.1821 -0.5236  0.0995\n",
      "country[T.Brazil]    -0.5077   0.2688 -1.8891 0.0589 -1.0345  0.0191\n",
      "country[T.Canada]    -0.2638   0.3480 -0.7581 0.4484 -0.9458  0.4182\n",
      "country[T.Japan]      0.9154   0.4834  1.8935 0.0583 -0.0321  1.8630\n",
      "country[T.Lithuania] -0.0952   0.5531 -0.1722 0.8633 -1.1793  0.9888\n",
      "country[T.Romania]   -0.8642   0.3244 -2.6641 0.0077 -1.5000 -0.2284\n",
      "country[T.Russia]    -0.1595   0.2214 -0.7203 0.4713 -0.5934  0.2744\n",
      "country[T.Serbia]    -0.4347   0.2987 -1.4554 0.1456 -1.0200  0.1507\n",
      "country[T.Thailand]  -0.2504   0.9379 -0.2670 0.7895 -2.0885  1.5878\n",
      "country[T.UK]         0.0154   0.2665  0.0579 0.9538 -0.5070  0.5378\n",
      "tobacco_ever[T.Yes]   0.7554   0.1593  4.7423 0.0000  0.4432  1.0676\n",
      "age_group             0.3790   0.0688  5.5113 0.0000  0.2442  0.5138\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.363711  0.994974  0.601567  4.774978e-02         1.00000\n",
      "fam_rcc[T.Yes]        0.566514  2.145620  1.102508  7.739145e-01         1.00000\n",
      "sex[T.Male]           0.592355  1.104564  0.808885  1.820997e-01         1.00000\n",
      "country[T.Brazil]     0.355394  1.019239  0.601857  5.888345e-02         1.00000\n",
      "country[T.Canada]     0.388380  1.519231  0.768140  4.483985e-01         1.00000\n",
      "country[T.Japan]      0.968385  6.442759  2.497814  5.828955e-02         1.00000\n",
      "country[T.Lithuania]  0.307489  2.688125  0.909158  8.632945e-01         1.00000\n",
      "country[T.Romania]    0.223120  0.795785  0.421373  7.719139e-03         1.00000\n",
      "country[T.Russia]     0.552464  1.315780  0.852597  4.713180e-01         1.00000\n",
      "country[T.Serbia]     0.360585  1.162641  0.647480  1.455596e-01         1.00000\n",
      "country[T.Thailand]   0.123868  4.892973  0.778513  7.895009e-01         1.00000\n",
      "country[T.UK]         0.602317  1.712283  1.015547  9.538419e-01         1.00000\n",
      "tobacco_ever[T.Yes]   1.557684  2.908413  2.128471  2.113048e-06         0.00059\n",
      "age_group             1.276606  1.671552  1.460792  3.561081e-08         0.00001\n",
      "Running logistic regression with parameter fam_rcc, signature SBS5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zero counts for signature SBS5: 748\n",
      "All counts for signature SBS5: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             426.2074  \n",
      "Link Function:         Logit             BIC:             -4940.3123\n",
      "Dependent Variable:    SBS5_bool         Log-Likelihood:  -199.10   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -221.46   \n",
      "No. Observations:      811               Deviance:        398.21    \n",
      "Df Model:              13                Pearson chi2:    796.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -3.3145   0.5051 -6.5622 0.0000 -4.3044 -2.3245\n",
      "fam_rcc[T.Yes]       -1.5542   1.0353 -1.5012 0.1333 -3.5834  0.4749\n",
      "sex[T.Male]          -0.3719   0.2912 -1.2772 0.2015 -0.9426  0.1988\n",
      "country[T.Brazil]     0.3146   0.4067  0.7737 0.4391 -0.4824  1.1117\n",
      "country[T.Canada]    -0.4974   0.6566 -0.7575 0.4488 -1.7843  0.7896\n",
      "country[T.Japan]      0.3821   0.5271  0.7248 0.4686 -0.6511  1.4152\n",
      "country[T.Lithuania]  0.5044   0.7041  0.7164 0.4738 -0.8757  1.8845\n",
      "country[T.Romania]   -1.2053   0.7665 -1.5725 0.1158 -2.7076  0.2969\n",
      "country[T.Russia]    -1.0576   0.4973 -2.1266 0.0335 -2.0324 -0.0829\n",
      "country[T.Serbia]    -0.4473   0.5830 -0.7673 0.4429 -1.5899  0.6953\n",
      "country[T.Thailand]   0.3684   1.1707  0.3147 0.7530 -1.9260  2.6629\n",
      "country[T.UK]        -0.6045   0.4732 -1.2774 0.2015 -1.5319  0.3230\n",
      "tobacco_ever[T.Yes]   0.2762   0.2953  0.9353 0.3496 -0.3026  0.8550\n",
      "age_group             0.5444   0.1338  4.0684 0.0000  0.2821  0.8066\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.013509   0.097830  0.036353  5.303016e-11    1.479541e-08\n",
      "fam_rcc[T.Yes]        0.027782   1.607929  0.211356  1.333006e-01    1.000000e+00\n",
      "sex[T.Male]           0.389630   1.219933  0.689437  2.015289e-01    1.000000e+00\n",
      "country[T.Brazil]     0.617279   3.039548  1.369762  4.391206e-01    1.000000e+00\n",
      "country[T.Canada]     0.167919   2.202453  0.608140  4.487760e-01    1.000000e+00\n",
      "country[T.Japan]      0.521482   4.117381  1.465312  4.685664e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.416588   6.583314  1.656059  4.737516e-01    1.000000e+00\n",
      "country[T.Romania]    0.066699   1.345740  0.299600  1.158240e-01    1.000000e+00\n",
      "country[T.Russia]     0.131019   0.920473  0.347274  3.345460e-02    1.000000e+00\n",
      "country[T.Serbia]     0.203950   2.004241  0.639347  4.428975e-01    1.000000e+00\n",
      "country[T.Thailand]   0.145728  14.337412  1.445464  7.529732e-01    1.000000e+00\n",
      "country[T.UK]         0.216132   1.381213  0.546374  2.014514e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.738890   2.351482  1.318138  3.496286e-01    1.000000e+00\n",
      "age_group             1.325930   2.240281  1.723501  4.732947e-05    1.320492e-02\n",
      "Running logistic regression with parameter fam_rcc, signature SBS12\n",
      "** Warning: Covariate country, sig SBS12, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig SBS12, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS12: 770\n",
      "All counts for signature SBS12: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                           2.5%       97.5%          OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]         0.000741    1.679234    0.113280  1.264079e-01    1.000000e+00\n",
      "sex[T.Male]            0.427079    2.749031    1.072920  8.263898e-01    1.000000e+00\n",
      "country[T.Brazil]      0.235667    7.650629    1.447603  5.929208e-01    1.000000e+00\n",
      "country[T.Canada]      1.403440   28.532064    6.067930  1.681636e-02    1.000000e+00\n",
      "country[T.Japan]      40.009666  610.953045  135.455815  1.260525e-21    3.516866e-19\n",
      "country[T.Lithuania]   0.009428   14.435343    1.294373  6.686382e-01    1.000000e+00\n",
      "country[T.Romania]     0.114272    7.511973    1.200625  4.686056e-01    1.000000e+00\n",
      "country[T.Russia]      0.110556    3.673192    0.686206  3.719732e-01    1.000000e+00\n",
      "country[T.Serbia]      0.116003    7.704875    1.222795  7.080038e-01    1.000000e+00\n",
      "country[T.Thailand]    0.021487   40.904016    3.085993  4.733105e-01    1.000000e+00\n",
      "country[T.UK]          0.204136    6.610576    1.252443  7.020137e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.238456    1.584819    0.630132  3.214392e-01    1.000000e+00\n",
      "age_group              0.949710    2.011007    1.367808  9.073095e-02    1.000000e+00\n",
      "Intercept              0.002245    0.054777    0.012660  1.183444e-11    3.301809e-09\n",
      "Running logistic regression with parameter fam_rcc, signature SBS13\n",
      "** Warning: Covariate country, sig SBS13, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS13: 675\n",
      "All counts for signature SBS13: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.313148  1.845332  0.819611  6.400544e-01    1.000000e+00\n",
      "sex[T.Male]           1.095469  2.518203  1.649346  1.608320e-02    1.000000e+00\n",
      "country[T.Brazil]     0.463165  1.915602  0.961829  8.686364e-01    1.000000e+00\n",
      "country[T.Canada]     0.477353  2.642577  1.171760  7.153722e-01    1.000000e+00\n",
      "country[T.Japan]      0.124524  1.407847  0.484794  1.933583e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.373452  4.917816  1.531040  5.092218e-01    1.000000e+00\n",
      "country[T.Romania]    0.050504  0.812497  0.254671  1.777305e-02    1.000000e+00\n",
      "country[T.Russia]     1.138713  3.326491  1.932158  1.404818e-02    1.000000e+00\n",
      "country[T.Serbia]     0.340854  1.825164  0.825161  6.287799e-01    1.000000e+00\n",
      "country[T.Thailand]   0.004536  5.783070  0.606123  7.037720e-01    1.000000e+00\n",
      "country[T.UK]         0.795677  2.725515  1.478162  2.122727e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.869462  1.920957  1.289980  2.046786e-01    1.000000e+00\n",
      "age_group             0.914879  1.292517  1.086305  3.428947e-01    1.000000e+00\n",
      "Intercept             0.048659  0.190697  0.098134  4.110617e-13    1.146862e-10\n",
      "Running logistic regression with parameter fam_rcc, signature SBS18\n",
      "** Warning: Covariate country, sig SBS18, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS18: 755\n",
      "All counts for signature SBS18: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.334394   3.578586  1.275716  6.620201e-01    1.000000e+00\n",
      "sex[T.Male]           1.239450   4.504110  2.293461  7.303900e-03    1.000000e+00\n",
      "country[T.Brazil]     0.500726   3.589144  1.377277  5.015050e-01    1.000000e+00\n",
      "country[T.Canada]     0.490575   5.141020  1.719730  3.706876e-01    1.000000e+00\n",
      "country[T.Japan]      0.215267   4.235463  1.156611  7.383541e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.585050  13.190248  3.290935  1.515914e-01    1.000000e+00\n",
      "country[T.Romania]    0.408437   4.254034  1.428161  5.133119e-01    1.000000e+00\n",
      "country[T.Russia]     0.481081   2.620741  1.114000  6.929603e-01    1.000000e+00\n",
      "country[T.Serbia]     0.429400   3.841501  1.357545  5.394674e-01    1.000000e+00\n",
      "country[T.Thailand]   0.015910  22.634143  2.166522  6.003575e-01    1.000000e+00\n",
      "country[T.UK]         0.551081   3.658552  1.444528  4.256124e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.457577   1.410966  0.804805  4.332298e-01    1.000000e+00\n",
      "age_group             0.677620   1.102832  0.864553  2.349042e-01    1.000000e+00\n",
      "Intercept             0.019012   0.134774  0.053041  1.248791e-11    3.484127e-09\n",
      "Running logistic regression with parameter fam_rcc, signature SBS21\n",
      "** Warning: Covariate country, sig SBS21, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig SBS21, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS21: 807\n",
      "All counts for signature SBS21: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.017872    25.360645   2.445176  0.478650        1.000000\n",
      "sex[T.Male]           0.102934     4.134448   0.655112  0.423475        1.000000\n",
      "country[T.Brazil]     0.008692   312.738282   1.649143  0.477428        1.000000\n",
      "country[T.Canada]     0.018392   639.123469   3.432375  0.506979        1.000000\n",
      "country[T.Japan]      0.030238  1112.656915   5.770392  0.286387        1.000000\n",
      "country[T.Lithuania]  0.059394  2298.131625  11.609551  0.202886        1.000000\n",
      "country[T.Romania]    0.499837  1483.141028   9.922671  0.084637        1.000000\n",
      "country[T.Russia]     0.105947   333.050394   2.173480  0.454034        1.000000\n",
      "country[T.Serbia]     0.011057   417.374477   2.137526  0.383834        1.000000\n",
      "country[T.Thailand]   0.168643  8031.668645  36.670957  0.094119        1.000000\n",
      "country[T.UK]         0.697582  1300.710147   9.171636  0.070447        1.000000\n",
      "tobacco_ever[T.Yes]   0.154274     6.905537   1.014931  0.522470        1.000000\n",
      "age_group             0.262949     1.512322   0.647174  0.246983        1.000000\n",
      "Intercept             0.000053     0.141774   0.009190  0.000064        0.017747\n",
      "Running logistic regression with parameter fam_rcc, signature SBS22\n",
      "** Warning: Covariate country, sig SBS22, perfect or near-perfect separation for category UK. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS22: 747\n",
      "All counts for signature SBS22: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]          0.267364    10.423406    1.921019  4.302814e-01    1.000000e+00\n",
      "sex[T.Male]             0.263613     1.273394    0.586253  1.653053e-01    1.000000e+00\n",
      "country[T.Brazil]       0.867986    57.388544    5.450114  6.200544e-02    1.000000e+00\n",
      "country[T.Canada]       0.387867    62.930945    4.938028  1.916750e-01    1.000000e+00\n",
      "country[T.Japan]        0.011563    33.642960    1.717341  5.407151e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.020698    63.388063    3.120481  3.940523e-01    1.000000e+00\n",
      "country[T.Romania]    100.304419  4322.350075  447.142466  1.268205e-32    3.538292e-30\n",
      "country[T.Russia]       0.002951     8.382447    0.435263  4.437096e-01    1.000000e+00\n",
      "country[T.Serbia]      13.107492   545.565594   57.457169  8.205779e-11    2.289412e-08\n",
      "country[T.Thailand]    13.405244  1910.810302  125.870059  2.739602e-05    7.643490e-03\n",
      "country[T.UK]           0.004365    12.394843    0.643737  5.650875e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.394607     1.995993    0.886122  6.511637e-01    1.000000e+00\n",
      "age_group               1.356250     2.763753    1.902910  1.176902e-04    3.283556e-02\n",
      "Intercept               0.000191     0.012693    0.002177  9.980498e-20    2.784559e-17\n",
      "Running logistic regression with parameter fam_rcc, signature SBS44\n",
      "** Warning: Covariate country, sig SBS44, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig SBS44, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS44: 806\n",
      "All counts for signature SBS44: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%        97.5%         OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.013943    18.010893   1.870504  0.542083        1.000000\n",
      "sex[T.Male]           0.173760     5.148146   0.884743  0.512139        1.000000\n",
      "country[T.Brazil]     0.284803   824.805450   5.559517  0.195879        1.000000\n",
      "country[T.Canada]     0.018740   643.924686   3.474019  0.520578        1.000000\n",
      "country[T.Japan]      0.026944   962.952840   5.084140  0.315694        1.000000\n",
      "country[T.Lithuania]  0.060986  2268.296400  11.721076  0.212951        1.000000\n",
      "country[T.Romania]    0.547976  1572.455332  10.634569  0.080442        1.000000\n",
      "country[T.Russia]     0.123564   372.223892   2.471136  0.417445        1.000000\n",
      "country[T.Serbia]     0.011905   434.689352   2.269708  0.410592        1.000000\n",
      "country[T.Thailand]   0.200730  8762.469716  41.776041  0.096328        1.000000\n",
      "country[T.UK]         0.670847  1225.265747   8.697695  0.077744        1.000000\n",
      "tobacco_ever[T.Yes]   0.279605     8.999251   1.471603  0.451160        1.000000\n",
      "age_group             0.325694     1.532575   0.717674  0.308021        1.000000\n",
      "Intercept             0.000031     0.079487   0.005221  0.000003        0.000767\n",
      "Running logistic regression with parameter fam_rcc, signature SBS1536A\n",
      "** Warning: Covariate country, sig SBS1536A, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536A: 406\n",
      "All counts for signature SBS1536A: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.550125  2.226490  1.100967  7.844315e-01    1.000000e+00\n",
      "sex[T.Male]           1.512828  2.942763  2.104429  8.702622e-06    2.428032e-03\n",
      "country[T.Brazil]     0.177962  0.545676  0.313444  3.699746e-05    1.032229e-02\n",
      "country[T.Canada]     0.213891  0.890081  0.436836  2.269375e-02    1.000000e+00\n",
      "country[T.Japan]      0.025310  0.157459  0.065873  1.266874e-10    3.534577e-08\n",
      "country[T.Lithuania]  0.402682  4.460920  1.238079  7.110762e-01    1.000000e+00\n",
      "country[T.Romania]    0.206640  0.801616  0.407973  9.339462e-03    1.000000e+00\n",
      "country[T.Russia]     0.239541  0.601609  0.380998  3.129266e-05    8.730653e-03\n",
      "country[T.Serbia]     0.288274  0.993815  0.536251  4.772238e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000218  0.282627  0.029261  8.591702e-04    2.397085e-01\n",
      "country[T.UK]         0.307366  0.903201  0.527597  1.971075e-02    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.823722  1.581633  1.141210  4.251140e-01    1.000000e+00\n",
      "age_group             1.811348  2.464682  2.105891  4.095391e-25    1.142614e-22\n",
      "Intercept             0.166213  0.494700  0.288409  5.360095e-06    1.495466e-03\n",
      "Running logistic regression with parameter fam_rcc, signature SBS1536B\n",
      "Zero counts for signature SBS1536B: 406\n",
      "All counts for signature SBS1536B: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1069.1388 \n",
      "Link Function:         Logit             BIC:             -4297.3808\n",
      "Dependent Variable:    SBS1536B_bool     Log-Likelihood:  -520.57   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -562.14   \n",
      "No. Observations:      811               Deviance:        1041.1    \n",
      "Df Model:              13                Pearson chi2:    810.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3110   0.2630 -4.9846 0.0000 -1.8265 -0.7955\n",
      "fam_rcc[T.Yes]        0.2470   0.3328  0.7421 0.4580 -0.4053  0.8992\n",
      "sex[T.Male]          -0.0540   0.1578 -0.3419 0.7324 -0.3633  0.2554\n",
      "country[T.Brazil]    -0.6292   0.2773 -2.2695 0.0232 -1.1727 -0.0858\n",
      "country[T.Canada]    -0.1635   0.3452 -0.4736 0.6358 -0.8402  0.5131\n",
      "country[T.Japan]     -0.3207   0.3847 -0.8336 0.4045 -1.0746  0.4333\n",
      "country[T.Lithuania] -0.0801   0.5381 -0.1488 0.8817 -1.1348  0.9746\n",
      "country[T.Romania]    0.1278   0.3225  0.3963 0.6919 -0.5043  0.7598\n",
      "country[T.Russia]     0.5161   0.2223  2.3212 0.0203  0.0803  0.9519\n",
      "country[T.Serbia]     0.2804   0.3011  0.9313 0.3517 -0.3097  0.8706\n",
      "country[T.Thailand]  -0.6539   0.9439 -0.6928 0.4884 -2.5039  1.1960\n",
      "country[T.UK]         0.3817   0.2597  1.4697 0.1417 -0.1273  0.8907\n",
      "tobacco_ever[T.Yes]   0.3369   0.1582  2.1294 0.0332  0.0268  0.6470\n",
      "age_group             0.5328   0.0707  7.5342 0.0000  0.3942  0.6714\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.160981  0.451356  0.269554  6.209492e-07    1.732448e-04\n",
      "fam_rcc[T.Yes]        0.666764  2.457746  1.280132  4.580447e-01    1.000000e+00\n",
      "sex[T.Male]           0.695366  1.290973  0.947469  7.324461e-01    1.000000e+00\n",
      "country[T.Brazil]     0.309543  0.917769  0.533000  2.324095e-02    1.000000e+00\n",
      "country[T.Canada]     0.431633  1.670530  0.849150  6.357588e-01    1.000000e+00\n",
      "country[T.Japan]      0.341421  1.542365  0.725669  4.045306e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.321481  2.650212  0.923035  8.816913e-01    1.000000e+00\n",
      "country[T.Romania]    0.603958  2.137937  1.136320  6.918911e-01    1.000000e+00\n",
      "country[T.Russia]     1.083634  2.590530  1.675466  2.027503e-02    1.000000e+00\n",
      "country[T.Serbia]     0.733647  2.388350  1.323709  3.516743e-01    1.000000e+00\n",
      "country[T.Thailand]   0.081769  3.306932  0.520005  4.884298e-01    1.000000e+00\n",
      "country[T.UK]         0.880435  2.436895  1.464762  1.416536e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   1.027177  1.909805  1.400610  3.321743e-02    1.000000e+00\n",
      "age_group             1.483173  1.956939  1.703666  4.912520e-14    1.370593e-11\n",
      "Running logistic regression with parameter fam_rcc, signature SBS1536F\n",
      "Zero counts for signature SBS1536F: 712\n",
      "All counts for signature SBS1536F: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             608.2973  \n",
      "Link Function:         Logit             BIC:             -4758.2223\n",
      "Dependent Variable:    SBS1536F_bool     Log-Likelihood:  -290.15   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -300.91   \n",
      "No. Observations:      811               Deviance:        580.30    \n",
      "Df Model:              13                Pearson chi2:    810.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.4249   0.4011 -6.0457 0.0000 -3.2110 -1.6388\n",
      "fam_rcc[T.Yes]        0.8321   0.4099  2.0299 0.0424  0.0287  1.6355\n",
      "sex[T.Male]           0.1337   0.2328  0.5745 0.5656 -0.3225  0.5899\n",
      "country[T.Brazil]     0.6000   0.4044  1.4836 0.1379 -0.1926  1.3926\n",
      "country[T.Canada]     1.3122   0.4405  2.9791 0.0029  0.4489  2.1755\n",
      "country[T.Japan]      0.6878   0.5569  1.2350 0.2168 -0.4038  1.7793\n",
      "country[T.Lithuania]  1.0432   0.7004  1.4894 0.1364 -0.3296  2.4160\n",
      "country[T.Romania]   -0.8274   0.7704 -1.0740 0.2828 -2.3373  0.6826\n",
      "country[T.Russia]     0.7074   0.3428  2.0636 0.0391  0.0355  1.3792\n",
      "country[T.Serbia]    -0.0729   0.5404 -0.1350 0.8926 -1.1320  0.9861\n",
      "country[T.Thailand]   1.0938   1.1590  0.9438 0.3453 -1.1777  3.3654\n",
      "country[T.UK]         0.5486   0.4030  1.3615 0.1733 -0.2411  1.3384\n",
      "tobacco_ever[T.Yes]  -0.2956   0.2339 -1.2640 0.2062 -0.7541  0.1628\n",
      "age_group            -0.0086   0.0989 -0.0865 0.9310 -0.2023  0.1852\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.040314   0.194218  0.088486  1.487821e-09    4.151022e-07\n",
      "fam_rcc[T.Yes]        1.029079   5.131892  2.298069  4.236755e-02    1.000000e+00\n",
      "sex[T.Male]           0.724343   1.803853  1.143070  5.656438e-01    1.000000e+00\n",
      "country[T.Brazil]     0.824789   4.025316  1.822097  1.379047e-01    1.000000e+00\n",
      "country[T.Canada]     1.566562   8.806223  3.714228  2.891100e-03    8.066169e-01\n",
      "country[T.Japan]      0.667807   5.925739  1.989283  2.168399e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.719223  11.201362  2.838359  1.363776e-01    1.000000e+00\n",
      "country[T.Romania]    0.096591   1.978934  0.437204  2.828409e-01    1.000000e+00\n",
      "country[T.Russia]     1.036165   3.971813  2.028658  3.905536e-02    1.000000e+00\n",
      "country[T.Serbia]     0.322390   2.680877  0.929671  8.926449e-01    1.000000e+00\n",
      "country[T.Thailand]   0.307983  28.943649  2.985660  3.452766e-01    1.000000e+00\n",
      "country[T.UK]         0.785728   3.813034  1.730898  1.733466e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.470448   1.176782  0.744053  2.062313e-01    1.000000e+00\n",
      "age_group             0.816834   1.203471  0.991482  9.310422e-01    1.000000e+00\n",
      "Running logistic regression with parameter fam_rcc, signature SBS1536I\n",
      "** Warning: Covariate country, sig SBS1536I, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS1536I: 722\n",
      "All counts for signature SBS1536I: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                            2.5%        97.5%          OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]          0.093372     4.573742    0.785783  5.372305e-01    1.000000e+00\n",
      "sex[T.Male]             0.469198     2.034271    0.975116  5.923950e-01    1.000000e+00\n",
      "country[T.Brazil]       0.507889    12.172759    2.485117  1.928839e-01    1.000000e+00\n",
      "country[T.Canada]       0.004736     6.996404    0.645192  7.488882e-01    1.000000e+00\n",
      "country[T.Japan]        0.158239    11.232435    1.712745  4.260829e-01    1.000000e+00\n",
      "country[T.Lithuania]    0.422517    32.722494    4.727775  1.345986e-01    1.000000e+00\n",
      "country[T.Romania]    100.279006  1606.064347  347.878964  1.921234e-35    5.360242e-33\n",
      "country[T.Russia]       0.062652     4.214506    0.663681  3.704986e-01    1.000000e+00\n",
      "country[T.Serbia]      33.033787   449.652222  105.553321  8.599307e-23    2.399207e-20\n",
      "country[T.Thailand]     8.582888   568.782808   63.892622  8.512778e-05    2.375065e-02\n",
      "country[T.UK]           0.001790     2.598512    0.243060  2.185108e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]     0.504421     2.234024    1.055570  5.855081e-01    1.000000e+00\n",
      "age_group               1.678192     3.397703    2.344105  9.170298e-08    2.558513e-05\n",
      "Intercept               0.000349     0.009690    0.002112  2.267296e-24    6.325755e-22\n",
      "Running logistic regression with parameter fam_rcc, signature DBS2\n",
      "Zero counts for signature DBS2: 479\n",
      "All counts for signature DBS2: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1069.2850 \n",
      "Link Function:         Logit             BIC:             -4297.2347\n",
      "Dependent Variable:    DBS2_bool         Log-Likelihood:  -520.64   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -548.75   \n",
      "No. Observations:      811               Deviance:        1041.3    \n",
      "Df Model:              13                Pearson chi2:    811.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.3399   0.2613 -5.1281 0.0000 -1.8521 -0.8278\n",
      "fam_rcc[T.Yes]        0.3513   0.3253  1.0799 0.2802 -0.2863  0.9889\n",
      "sex[T.Male]           0.2328   0.1586  1.4683 0.1420 -0.0780  0.5436\n",
      "country[T.Brazil]    -0.1787   0.2682 -0.6664 0.5052 -0.7043  0.3469\n",
      "country[T.Canada]    -0.4485   0.3490 -1.2850 0.1988 -1.1326  0.2356\n",
      "country[T.Japan]     -0.9031   0.4043 -2.2335 0.0255 -1.6956 -0.1106\n",
      "country[T.Lithuania]  0.0134   0.5378  0.0250 0.9801 -1.0406  1.0674\n",
      "country[T.Romania]    0.2520   0.3185  0.7912 0.4288 -0.3722  0.8761\n",
      "country[T.Russia]    -0.2104   0.2227 -0.9446 0.3449 -0.6469  0.2261\n",
      "country[T.Serbia]     0.3927   0.2952  1.3303 0.1834 -0.1859  0.9714\n",
      "country[T.Thailand]  -0.0171   0.9425 -0.0181 0.9855 -1.8643  1.8301\n",
      "country[T.UK]        -0.1593   0.2552 -0.6244 0.5324 -0.6594  0.3408\n",
      "tobacco_ever[T.Yes]   0.7660   0.1578  4.8533 0.0000  0.4567  1.0754\n",
      "age_group             0.2651   0.0681  3.8951 0.0001  0.1317  0.3985\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.156911  0.437004  0.261860  2.927411e-07        0.000082\n",
      "fam_rcc[T.Yes]        0.751043  2.688236  1.420909  2.801892e-01        1.000000\n",
      "sex[T.Male]           0.924995  1.722219  1.262159  1.420298e-01        1.000000\n",
      "country[T.Brazil]     0.494461  1.414665  0.836359  5.051709e-01        1.000000\n",
      "country[T.Canada]     0.322200  1.265645  0.638585  1.987929e-01        1.000000\n",
      "country[T.Japan]      0.183488  0.895309  0.405313  2.551890e-02        1.000000\n",
      "country[T.Lithuania]  0.353260  2.907810  1.013515  9.800842e-01        1.000000\n",
      "country[T.Romania]    0.689216  2.401523  1.286533  4.288406e-01        1.000000\n",
      "country[T.Russia]     0.523685  1.253750  0.810290  3.448818e-01        1.000000\n",
      "country[T.Serbia]     0.830358  2.641579  1.481033  1.834230e-01        1.000000\n",
      "country[T.Thailand]   0.155005  6.234580  0.983051  9.855292e-01        1.000000\n",
      "country[T.UK]         0.517145  1.406065  0.852724  5.323807e-01        1.000000\n",
      "tobacco_ever[T.Yes]   1.578833  2.931164  2.151237  1.213951e-06        0.000339\n",
      "age_group             1.140791  1.489659  1.303607  9.816008e-05        0.027387\n",
      "Running logistic regression with parameter fam_rcc, signature DBS4\n",
      "** Warning: Covariate country, sig DBS4, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS4: 738\n",
      "All counts for signature DBS4: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.313060  2.553201  1.004849  8.569653e-01        1.000000\n",
      "sex[T.Male]           0.501804  1.391465  0.832821  4.713185e-01        1.000000\n",
      "country[T.Brazil]     0.285509  1.546292  0.693529  3.682253e-01        1.000000\n",
      "country[T.Canada]     0.324742  2.461988  0.966862  9.331967e-01        1.000000\n",
      "country[T.Japan]      0.097046  1.662720  0.499481  2.744425e-01        1.000000\n",
      "country[T.Lithuania]  0.924654  9.008551  3.031857  6.388421e-02        1.000000\n",
      "country[T.Romania]    0.115723  1.289842  0.447797  1.396804e-01        1.000000\n",
      "country[T.Russia]     0.353843  1.400242  0.709035  3.049644e-01        1.000000\n",
      "country[T.Serbia]     0.054421  0.894415  0.276223  2.932204e-02        1.000000\n",
      "country[T.Thailand]   0.003637  4.671537  0.487174  5.748747e-01        1.000000\n",
      "country[T.UK]         0.251904  1.358515  0.610831  2.274916e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.521960  1.474110  0.878867  6.055332e-01        1.000000\n",
      "age_group             0.965466  1.506459  1.202251  9.842863e-02        1.000000\n",
      "Intercept             0.049775  0.256022  0.116065  1.988649e-08        0.000006\n",
      "Running logistic regression with parameter fam_rcc, signature DBS9\n",
      "** Warning: Covariate country, sig DBS9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS9: 780\n",
      "All counts for signature DBS9: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.744669   6.748351  2.485343  1.237435e-01        1.000000\n",
      "sex[T.Male]           0.608241   2.846735  1.287059  4.888474e-01        1.000000\n",
      "country[T.Brazil]     0.243729   2.493750  0.845597  6.923566e-01        1.000000\n",
      "country[T.Canada]     0.331723   4.371169  1.356521  6.356762e-01        1.000000\n",
      "country[T.Japan]      0.001784   1.890996  0.231862  2.006750e-01        1.000000\n",
      "country[T.Lithuania]  0.176875   8.316055  1.727448  5.227565e-01        1.000000\n",
      "country[T.Romania]    0.001174   1.214256  0.151992  8.064455e-02        1.000000\n",
      "country[T.Russia]     0.241576   1.785376  0.673175  3.985087e-01        1.000000\n",
      "country[T.Serbia]     0.230026   3.024746  0.940166  7.569398e-01        1.000000\n",
      "country[T.Thailand]   0.011647  16.381687  1.585822  6.777969e-01        1.000000\n",
      "country[T.UK]         0.080434   1.496628  0.423474  1.852899e-01        1.000000\n",
      "tobacco_ever[T.Yes]   0.366135   1.659756  0.784233  4.984257e-01        1.000000\n",
      "age_group             0.765710   1.450590  1.049818  7.047148e-01        1.000000\n",
      "Intercept             0.014544   0.154313  0.050650  9.664232e-09        0.000003\n",
      "Running logistic regression with parameter fam_rcc, signature DBS78C\n",
      "** Warning: Covariate country, sig DBS78C, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78C: 734\n",
      "All counts for signature DBS78C: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.616055   3.736310  1.630804  2.983231e-01    1.000000e+00\n",
      "sex[T.Male]           1.004636   2.897198  1.681270  4.734712e-02    1.000000e+00\n",
      "country[T.Brazil]     0.517950   2.842960  1.245980  5.950458e-01    1.000000e+00\n",
      "country[T.Canada]     0.719267   4.741765  1.922279  1.834104e-01    1.000000e+00\n",
      "country[T.Japan]      0.123618   2.224241  0.646348  5.019087e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.116705   5.039098  1.113247  8.228463e-01    1.000000e+00\n",
      "country[T.Romania]    0.271705   2.537417  0.914982  8.220297e-01    1.000000e+00\n",
      "country[T.Russia]     0.521009   2.272789  1.088718  7.539655e-01    1.000000e+00\n",
      "country[T.Serbia]     0.432449   3.045456  1.211504  6.687881e-01    1.000000e+00\n",
      "country[T.Thailand]   0.008541  11.266467  1.147860  8.582828e-01    1.000000e+00\n",
      "country[T.UK]         0.819609   3.665217  1.738319  1.458930e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.711271   1.916110  1.163748  5.387314e-01    1.000000e+00\n",
      "age_group             0.928315   1.429662  1.149473  2.002928e-01    1.000000e+00\n",
      "Intercept             0.018574   0.105648  0.045816  1.633667e-15    4.557930e-13\n",
      "Running logistic regression with parameter fam_rcc, signature DBS78D\n",
      "** Warning: Covariate country, sig DBS78D, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS78D: 760\n",
      "All counts for signature DBS78D: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%         OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.087866   3.837577   0.851058  7.313334e-01    1.000000e+00\n",
      "sex[T.Male]           0.420702   1.584850   0.815721  5.150653e-01    1.000000e+00\n",
      "country[T.Brazil]     0.057412   2.939664   0.569020  4.669911e-01    1.000000e+00\n",
      "country[T.Canada]     0.367796   9.315403   2.124799  3.598555e-01    1.000000e+00\n",
      "country[T.Japan]      0.135045   7.285960   1.362421  6.386721e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.255777  15.040819   2.657991  3.143149e-01    1.000000e+00\n",
      "country[T.Romania]    9.740019  77.664165  25.374202  4.463203e-13    1.245234e-10\n",
      "country[T.Russia]     0.222938   3.188864   0.864119  5.785678e-01    1.000000e+00\n",
      "country[T.Serbia]     2.688552  24.537170   7.670746  1.216554e-04    3.394186e-02\n",
      "country[T.Thailand]   0.012855  21.007737   1.798832  6.156889e-01    1.000000e+00\n",
      "country[T.UK]         0.147857   3.616184   0.842468  6.940237e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.323879   1.309674   0.657615  2.233657e-01    1.000000e+00\n",
      "age_group             1.134679   2.043512   1.509634  4.160741e-03    1.000000e+00\n",
      "Intercept             0.004160   0.053047   0.016187  2.221721e-15    6.198601e-13\n",
      "Running logistic regression with parameter fam_rcc, signature ID1\n",
      "Zero counts for signature ID1: 406\n",
      "All counts for signature ID1: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             1106.7347 \n",
      "Link Function:         Logit             BIC:             -4259.7849\n",
      "Dependent Variable:    ID1_bool          Log-Likelihood:  -539.37   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -562.14   \n",
      "No. Observations:      811               Deviance:        1078.7    \n",
      "Df Model:              13                Pearson chi2:    811.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -0.6843   0.2490 -2.7483 0.0060 -1.1723 -0.1963\n",
      "fam_rcc[T.Yes]        0.5776   0.3311  1.7445 0.0811 -0.0713  1.2266\n",
      "sex[T.Male]           0.2297   0.1546  1.4855 0.1374 -0.0734  0.5327\n",
      "country[T.Brazil]     0.1369   0.2588  0.5291 0.5968 -0.3703  0.6441\n",
      "country[T.Canada]     0.8014   0.3497  2.2915 0.0219  0.1159  1.4869\n",
      "country[T.Japan]      0.1529   0.3747  0.4081 0.6832 -0.5815  0.8873\n",
      "country[T.Lithuania] -0.7391   0.5678 -1.3015 0.1931 -1.8520  0.3739\n",
      "country[T.Romania]   -0.3772   0.3194 -1.1807 0.2377 -1.0033  0.2489\n",
      "country[T.Russia]     0.4818   0.2166  2.2244 0.0261  0.0573  0.9064\n",
      "country[T.Serbia]    -0.2707   0.2987 -0.9064 0.3647 -0.8562  0.3147\n",
      "country[T.Thailand]   1.3315   1.1356  1.1726 0.2410 -0.8942  3.5572\n",
      "country[T.UK]        -0.0603   0.2511 -0.2401 0.8102 -0.5524  0.4318\n",
      "tobacco_ever[T.Yes]  -0.3220   0.1537 -2.0942 0.0362 -0.6233 -0.0206\n",
      "age_group             0.2761   0.0658  4.1937 0.0000  0.1471  0.4052\n",
      "====================================================================\n",
      "\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "Intercept             0.309644   0.821764  0.504435  0.005990        1.000000\n",
      "fam_rcc[T.Yes]        0.931154   3.409515  1.781792  0.081067        1.000000\n",
      "sex[T.Male]           0.929268   1.703504  1.258178  0.137416        1.000000\n",
      "country[T.Brazil]     0.690557   1.904192  1.146714  0.596758        1.000000\n",
      "country[T.Canada]     1.122934   4.423180  2.228663  0.021935        1.000000\n",
      "country[T.Japan]      0.559058   2.428554  1.165205  0.683235        1.000000\n",
      "country[T.Lithuania]  0.156922   1.453369  0.477561  0.193076        1.000000\n",
      "country[T.Romania]    0.366678   1.282643  0.685797  0.237712        1.000000\n",
      "country[T.Russia]     1.058948   2.475438  1.619062  0.026124        1.000000\n",
      "country[T.Serbia]     0.424780   1.369852  0.762814  0.364728        1.000000\n",
      "country[T.Thailand]   0.408948  35.066189  3.786850  0.240974        1.000000\n",
      "country[T.UK]         0.575561   1.540058  0.941487  0.810223        1.000000\n",
      "tobacco_ever[T.Yes]   0.536181   0.979568  0.724725  0.036239        1.000000\n",
      "age_group             1.158438   1.499552  1.318005  0.000027        0.007657\n",
      "Running logistic regression with parameter fam_rcc, signature ID2\n",
      "** Warning: Covariate country, sig ID2, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID2: 797\n",
      "All counts for signature ID2: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.208306   9.114034  1.995315  0.376410        1.000000\n",
      "sex[T.Male]           0.359070   3.360812  1.051476  0.576330        1.000000\n",
      "country[T.Brazil]     0.001530   1.993175  0.204644  0.158555        1.000000\n",
      "country[T.Canada]     0.003469   4.486722  0.463210  0.559063        1.000000\n",
      "country[T.Japan]      0.564433  16.881812  3.427441  0.129638        1.000000\n",
      "country[T.Lithuania]  0.009693  13.898430  1.319099  0.517608        1.000000\n",
      "country[T.Romania]    0.110792   6.456776  1.134898  0.509413        1.000000\n",
      "country[T.Russia]     0.202176   3.466932  0.834343  0.509782        1.000000\n",
      "country[T.Serbia]     0.002124   2.812886  0.284988  0.246246        1.000000\n",
      "country[T.Thailand]   0.026627  49.684966  3.818771  0.332584        1.000000\n",
      "country[T.UK]         0.330323   6.533525  1.526283  0.409649        1.000000\n",
      "tobacco_ever[T.Yes]   0.213456   1.979650  0.661413  0.352647        1.000000\n",
      "age_group             0.530892   1.333786  0.838971  0.353253        1.000000\n",
      "Intercept             0.007552   0.181129  0.042492  0.000002        0.000644\n",
      "Running logistic regression with parameter fam_rcc, signature ID3\n",
      "** Warning: Covariate country, sig ID3, perfect or near-perfect separation for category Lithuania. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID3: 776\n",
      "All counts for signature ID3: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.848904   7.844033  2.852510  8.157129e-02    1.000000e+00\n",
      "sex[T.Male]           0.421797   1.809072  0.863293  6.211002e-01    1.000000e+00\n",
      "country[T.Brazil]     0.239977   3.583823  1.019175  7.449804e-01    1.000000e+00\n",
      "country[T.Canada]     0.240155   5.270640  1.330507  7.045223e-01    1.000000e+00\n",
      "country[T.Japan]      0.314876   7.225058  1.773329  4.336959e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.006305   7.678016  0.837579  6.976648e-01    1.000000e+00\n",
      "country[T.Romania]    2.084948  17.260749  5.868078  8.888681e-04    2.479942e-01\n",
      "country[T.Russia]     0.335200   3.237534  1.051716  7.562476e-01    1.000000e+00\n",
      "country[T.Serbia]     0.066934   3.131269  0.649934  5.655180e-01    1.000000e+00\n",
      "country[T.Thailand]   0.825719  63.269386  9.274560  6.231190e-02    1.000000e+00\n",
      "country[T.UK]         0.307882   3.760482  1.133928  7.146158e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.746618   3.306514  1.551390  2.280367e-01    1.000000e+00\n",
      "age_group             0.844638   1.580146  1.149725  3.558487e-01    1.000000e+00\n",
      "Intercept             0.006224   0.073653  0.023042  3.856140e-13    1.075863e-10\n",
      "Running logistic regression with parameter fam_rcc, signature ID5\n",
      "Zero counts for signature ID5: 406\n",
      "All counts for signature ID5: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             947.5810  \n",
      "Link Function:         Logit             BIC:             -4418.9386\n",
      "Dependent Variable:    ID5_bool          Log-Likelihood:  -459.79   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -562.14   \n",
      "No. Observations:      811               Deviance:        919.58    \n",
      "Df Model:              13                Pearson chi2:    823.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -1.6884   0.2891 -5.8411 0.0000 -2.2550 -1.1219\n",
      "fam_rcc[T.Yes]        0.1597   0.3580  0.4462 0.6555 -0.5419  0.8614\n",
      "sex[T.Male]           0.5397   0.1727  3.1257 0.0018  0.2013  0.8781\n",
      "country[T.Brazil]    -1.0171   0.2910 -3.4949 0.0005 -1.5875 -0.4467\n",
      "country[T.Canada]    -0.5867   0.3694 -1.5883 0.1122 -1.3108  0.1373\n",
      "country[T.Japan]     -2.8911   0.5142 -5.6224 0.0000 -3.8989 -1.8833\n",
      "country[T.Lithuania] -0.3802   0.5642 -0.6739 0.5004 -1.4861  0.7257\n",
      "country[T.Romania]    0.6989   0.3839  1.8205 0.0687 -0.0536  1.4514\n",
      "country[T.Russia]    -0.4479   0.2353 -1.9033 0.0570 -0.9091  0.0133\n",
      "country[T.Serbia]    -0.0361   0.3222 -0.1121 0.9107 -0.6675  0.5953\n",
      "country[T.Thailand]  -2.3748   1.1676 -2.0338 0.0420 -4.6633 -0.0863\n",
      "country[T.UK]        -0.3886   0.2749 -1.4134 0.1575 -0.9275  0.1503\n",
      "tobacco_ever[T.Yes]   0.0425   0.1698  0.2502 0.8024 -0.2903  0.3753\n",
      "age_group             0.8807   0.0832 10.5893 0.0000  0.7177  1.0437\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.104878  0.325671  0.184813  5.186499e-09    1.447033e-06\n",
      "fam_rcc[T.Yes]        0.581613  2.366540  1.173206  6.554627e-01    1.000000e+00\n",
      "sex[T.Male]           1.222973  2.406345  1.715487  1.773608e-03    4.948367e-01\n",
      "country[T.Brazil]     0.204443  0.639737  0.361649  4.742055e-04    1.323033e-01\n",
      "country[T.Canada]     0.269603  1.147185  0.556134  1.122244e-01    1.000000e+00\n",
      "country[T.Japan]      0.020264  0.152092  0.055515  1.882932e-08    5.253382e-06\n",
      "country[T.Lithuania]  0.226247  2.066124  0.683706  5.003952e-01    1.000000e+00\n",
      "country[T.Romania]    0.947853  4.269138  2.011595  6.868754e-02    1.000000e+00\n",
      "country[T.Russia]     0.402901  1.013415  0.638988  5.699716e-02    1.000000e+00\n",
      "country[T.Serbia]     0.512975  1.813580  0.964531  9.107461e-01    1.000000e+00\n",
      "country[T.Thailand]   0.009435  0.917357  0.093036  4.196785e-02    1.000000e+00\n",
      "country[T.UK]         0.395556  1.162148  0.678008  1.575375e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.748018  1.455421  1.043399  8.024408e-01    1.000000e+00\n",
      "age_group             2.049714  2.839756  2.412610  3.339359e-26    9.316811e-24\n",
      "Running logistic regression with parameter fam_rcc, signature ID8\n",
      "Zero counts for signature ID8: 222\n",
      "All counts for signature ID8: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             885.5452  \n",
      "Link Function:         Logit             BIC:             -4480.9744\n",
      "Dependent Variable:    ID8_bool          Log-Likelihood:  -428.77   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -476.01   \n",
      "No. Observations:      811               Deviance:        857.55    \n",
      "Df Model:              13                Pearson chi2:    799.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept             0.0463   0.2873  0.1611 0.8721 -0.5169  0.6094\n",
      "fam_rcc[T.Yes]       -0.0124   0.3864 -0.0321 0.9744 -0.7697  0.7449\n",
      "sex[T.Male]           0.4207   0.1765  2.3835 0.0171  0.0748  0.7667\n",
      "country[T.Brazil]    -0.9083   0.3023 -3.0047 0.0027 -1.5008 -0.3158\n",
      "country[T.Canada]    -0.4485   0.4062 -1.1042 0.2695 -1.2446  0.3476\n",
      "country[T.Japan]     -1.6941   0.4183 -4.0503 0.0001 -2.5139 -0.8743\n",
      "country[T.Lithuania]  0.3243   0.7958  0.4076 0.6836 -1.2354  1.8841\n",
      "country[T.Romania]   -0.7977   0.3710 -2.1500 0.0316 -1.5248 -0.0705\n",
      "country[T.Russia]    -0.1937   0.2661 -0.7278 0.4668 -0.7152  0.3279\n",
      "country[T.Serbia]    -0.8155   0.3333 -2.4466 0.0144 -1.4689 -0.1622\n",
      "country[T.Thailand]  -1.4231   0.9572 -1.4868 0.1371 -3.2991  0.4529\n",
      "country[T.UK]        -0.4577   0.3138 -1.4585 0.1447 -1.0727  0.1574\n",
      "tobacco_ever[T.Yes]   0.1272   0.1771  0.7183 0.4726 -0.2199  0.4744\n",
      "age_group             0.6026   0.0788  7.6423 0.0000  0.4481  0.7571\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.596375  1.839395  1.047363  8.720526e-01    1.000000e+00\n",
      "fam_rcc[T.Yes]        0.463154  2.106205  0.987672  9.743891e-01    1.000000e+00\n",
      "sex[T.Male]           1.077633  2.152688  1.523092  1.714776e-02    1.000000e+00\n",
      "country[T.Brazil]     0.222960  0.729201  0.403215  2.658632e-03    7.417584e-01\n",
      "country[T.Canada]     0.288067  1.415664  0.638596  2.695190e-01    1.000000e+00\n",
      "country[T.Japan]      0.080949  0.417140  0.183758  5.115252e-05    1.427155e-02\n",
      "country[T.Lithuania]  0.290714  6.580508  1.383128  6.835910e-01    1.000000e+00\n",
      "country[T.Romania]    0.217654  0.931933  0.450377  3.155778e-02    1.000000e+00\n",
      "country[T.Russia]     0.489089  1.388042  0.823939  4.667621e-01    1.000000e+00\n",
      "country[T.Serbia]     0.230188  0.850261  0.442402  1.442117e-02    1.000000e+00\n",
      "country[T.Thailand]   0.036915  1.572943  0.240967  1.370797e-01    1.000000e+00\n",
      "country[T.UK]         0.342087  1.170417  0.632760  1.447061e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.802577  1.607051  1.135686  4.725518e-01    1.000000e+00\n",
      "age_group             1.565259  2.132162  1.826851  2.133821e-14    5.953360e-12\n",
      "Running logistic regression with parameter fam_rcc, signature ID9\n",
      "** Warning: Covariate country, sig ID9, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig ID9, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID9: 807\n",
      "All counts for signature ID9: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%       97.5%        OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.007476    9.767834  1.002152  0.705483        1.000000\n",
      "sex[T.Male]           0.184124   12.084670  1.174235  0.641466        1.000000\n",
      "country[T.Brazil]     0.003028    5.319124  0.420921  0.430642        1.000000\n",
      "country[T.Canada]     0.193701   16.796902  2.168654  0.470416        1.000000\n",
      "country[T.Japan]      0.005181    9.817273  0.735727  0.571396        1.000000\n",
      "country[T.Lithuania]  0.015039   29.769831  2.144926  0.453150        1.000000\n",
      "country[T.Romania]    0.004770    8.512997  0.664915  0.506565        1.000000\n",
      "country[T.Russia]     0.001640    3.315037  0.235314  0.218993        1.000000\n",
      "country[T.Serbia]     0.158460   15.597119  1.865029  0.465630        1.000000\n",
      "country[T.Thailand]   0.045324  120.911512  6.873750  0.272812        1.000000\n",
      "country[T.UK]         0.002082    3.760437  0.291233  0.322400        1.000000\n",
      "tobacco_ever[T.Yes]   0.322229   23.350254  2.130612  0.407556        1.000000\n",
      "age_group             0.553665    3.221484  1.253889  0.538313        1.000000\n",
      "Intercept             0.000129    0.088375  0.005869  0.000010        0.002688\n",
      "Running logistic regression with parameter fam_rcc, signature ID11\n",
      "** Warning: Covariate country, sig ID11, perfect or near-perfect separation for category Romania. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig ID11, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID11: 804\n",
      "All counts for signature ID11: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%        OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.009367  11.167427  1.238834  0.653892        1.000000\n",
      "sex[T.Male]           0.153419   3.134423  0.719549  0.591031        1.000000\n",
      "country[T.Brazil]     0.002606   4.779581  0.366166  0.371482        1.000000\n",
      "country[T.Canada]     0.261840  23.762570  2.972751  0.312636        1.000000\n",
      "country[T.Japan]      0.008595  15.836277  1.207188  0.574474        1.000000\n",
      "country[T.Lithuania]  0.011362  22.076567  1.616612  0.489408        1.000000\n",
      "country[T.Romania]    0.003897   6.953331  0.543203  0.417339        1.000000\n",
      "country[T.Russia]     0.186756   7.119054  1.068012  0.507310        1.000000\n",
      "country[T.Serbia]     0.003616   6.451376  0.505326  0.465577        1.000000\n",
      "country[T.Thailand]   0.020924  52.156346  3.139743  0.384843        1.000000\n",
      "country[T.UK]         0.110998   9.685025  1.244049  0.572717        1.000000\n",
      "tobacco_ever[T.Yes]   0.026811   1.447804  0.268516  0.120727        1.000000\n",
      "age_group             0.620017   2.377992  1.173901  0.513789        1.000000\n",
      "Intercept             0.001398   0.176053  0.021541  0.000060        0.016723\n",
      "Running logistic regression with parameter fam_rcc, signature ID12\n",
      "** Warning: Covariate country, sig ID12, perfect or near-perfect separation for category Canada. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig ID12, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID12: 804\n",
      "All counts for signature ID12: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%         97.5%         OR   p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.012254     16.390424   1.656695  0.559788        1.000000\n",
      "sex[T.Male]           0.125311      2.353798   0.566594  0.304533        1.000000\n",
      "country[T.Brazil]     0.841304   1296.390473   9.449685  0.056797        1.000000\n",
      "country[T.Canada]     0.019075    662.018635   3.555419  0.496130        1.000000\n",
      "country[T.Japan]      0.032359   1193.796903   6.185868  0.252929        1.000000\n",
      "country[T.Lithuania]  0.067718   2610.257867  13.207775  0.183870        1.000000\n",
      "country[T.Romania]    0.015583    557.879513   2.954686  0.301889        1.000000\n",
      "country[T.Russia]     0.365347    557.393785   4.052838  0.233689        1.000000\n",
      "country[T.Serbia]     0.008894    322.320481   1.689854  0.370964        1.000000\n",
      "country[T.Thailand]   0.207989  10219.855729  45.601004  0.079133        1.000000\n",
      "country[T.UK]         0.296644    873.605665   5.836301  0.164446        1.000000\n",
      "tobacco_ever[T.Yes]   0.124715      3.047144   0.697578  0.418192        1.000000\n",
      "age_group             0.200636      0.897558   0.453576  0.018484        1.000000\n",
      "Intercept             0.000139      0.216760   0.019837  0.000183        0.051162\n",
      "Running logistic regression with parameter fam_rcc, signature ID83C\n",
      "** Warning: Covariate country, sig ID83C, perfect or near-perfect separation for category  Czechia. Using the penalised approach (Firth method).\n",
      "** Warning: Parameter fam_rcc, sig ID83C, perfect or near-perfect separation for category Yes. Using the penalised approach (Firth method).\n",
      "Zero counts for signature ID83C: 795\n",
      "All counts for signature ID83C: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                           2.5%         97.5%         OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]         0.010280     14.785320   1.405436  6.727417e-01    1.000000e+00\n",
      "sex[T.Male]            0.319314      3.146599   0.986180  7.483787e-01    1.000000e+00\n",
      "country[T.Brazil]      0.011846    407.897136   2.198485  4.764081e-01    1.000000e+00\n",
      "country[T.Canada]      0.717483   2032.416833  13.792810  7.885822e-02    1.000000e+00\n",
      "country[T.Japan]       0.024124    847.539055   4.522716  3.629665e-01    1.000000e+00\n",
      "country[T.Lithuania]   0.049463   1779.491181   9.379650  2.177695e-01    1.000000e+00\n",
      "country[T.Romania]    12.117743  12527.860400  96.705277  1.107463e-08    3.089822e-06\n",
      "country[T.Russia]      0.005978    209.595312   1.119255  5.256152e-01    1.000000e+00\n",
      "country[T.Serbia]      2.141038   3062.726226  22.662619  6.551934e-03    1.000000e+00\n",
      "country[T.Thailand]    0.104235   4199.044982  20.899220  1.499292e-01    1.000000e+00\n",
      "country[T.UK]          0.009785    339.135410   1.821500  5.790367e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]    0.205979      2.184990   0.697316  4.867849e-01    1.000000e+00\n",
      "age_group              0.989904      2.755339   1.600335  5.121567e-02    1.000000e+00\n",
      "Intercept              0.000008      0.014224   0.001155  1.057069e-13    2.949222e-11\n",
      "Running logistic regression with parameter fam_rcc, signature SBS_burden\n",
      "** Warning: Covariate country, sig SBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature SBS_burden: 406\n",
      "All counts for signature SBS_burden: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                          2.5%      97.5%         OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.765079   3.256813   1.563569  2.199772e-01    1.000000e+00\n",
      "sex[T.Male]           1.518552   3.091131   2.159071  1.552124e-05    4.330427e-03\n",
      "country[T.Brazil]     0.365253   1.168112   0.655182  1.510675e-01    1.000000e+00\n",
      "country[T.Canada]     0.271171   1.220043   0.577819  1.503131e-01    1.000000e+00\n",
      "country[T.Japan]      0.210403   1.169491   0.495197  1.079776e-01    1.000000e+00\n",
      "country[T.Lithuania]  0.345834   3.261270   1.046117  8.602116e-01    1.000000e+00\n",
      "country[T.Romania]    4.706987  34.469172  11.879423  1.142983e-08    3.188923e-06\n",
      "country[T.Russia]     0.564973   1.465838   0.909651  6.544966e-01    1.000000e+00\n",
      "country[T.Serbia]     1.011064   3.790235   1.945986  4.624436e-02    1.000000e+00\n",
      "country[T.Thailand]   0.000314   0.447055   0.043004  5.174961e-03    1.000000e+00\n",
      "country[T.UK]         0.582184   1.758486   1.010764  9.382424e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.818968   1.637817   1.157594  4.057628e-01    1.000000e+00\n",
      "age_group             2.513418   3.612557   2.996684  5.995066e-45    1.672623e-42\n",
      "Intercept             0.032696   0.115152   0.062293  2.594770e-21    7.239408e-19\n",
      "Running logistic regression with parameter fam_rcc, signature DBS_burden\n",
      "** Warning: Covariate country, sig DBS_burden, perfect or near-perfect separation for category Thailand. Using the penalised approach (Firth method).\n",
      "Zero counts for signature DBS_burden: 449\n",
      "All counts for signature DBS_burden: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/Users/senkins/miniforge3/envs/RCC_analysis/lib/python3.9/site-packages/sklearn/utils/validation.py:1111: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tests =  279\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "fam_rcc[T.Yes]        0.676473  2.659885  1.331939  4.058320e-01    1.000000e+00\n",
      "sex[T.Male]           1.047931  2.107054  1.483132  2.606653e-02    1.000000e+00\n",
      "country[T.Brazil]     0.331020  1.021499  0.583436  5.914162e-02    1.000000e+00\n",
      "country[T.Canada]     0.215834  0.920435  0.449687  2.866543e-02    1.000000e+00\n",
      "country[T.Japan]      0.079984  0.438974  0.191969  7.284550e-05    2.032389e-02\n",
      "country[T.Lithuania]  0.489754  4.824146  1.457781  4.976233e-01    1.000000e+00\n",
      "country[T.Romania]    1.610707  9.582111  3.753842  1.767954e-03    4.932593e-01\n",
      "country[T.Russia]     0.452980  1.184750  0.733059  2.005495e-01    1.000000e+00\n",
      "country[T.Serbia]     0.692732  2.448405  1.297329  4.168727e-01    1.000000e+00\n",
      "country[T.Thailand]   0.000310  0.412179  0.041887  3.620542e-03    1.000000e+00\n",
      "country[T.UK]         0.540280  1.760129  0.972454  9.214985e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.819684  1.634382  1.156695  4.072710e-01    1.000000e+00\n",
      "age_group             1.857487  2.574348  2.178161  5.927785e-25    1.653852e-22\n",
      "Intercept             0.106651  0.337240  0.191509  4.772615e-09    1.331560e-06\n",
      "Running logistic regression with parameter fam_rcc, signature ID_burden\n",
      "Zero counts for signature ID_burden: 403\n",
      "All counts for signature ID_burden: 811\n",
      "Covariates used: ['sex', 'age_group', 'country', 'tobacco_ever']\n",
      "Number of tests =  279\n",
      "                 Results: Generalized linear model\n",
      "====================================================================\n",
      "Model:                 GLM               AIC:             916.8923  \n",
      "Link Function:         Logit             BIC:             -4449.6273\n",
      "Dependent Variable:    ID_burden_bool    Log-Likelihood:  -444.45   \n",
      "Date:                  2024-02-08 17:23  LL-Null:         -562.13   \n",
      "No. Observations:      811               Deviance:        888.89    \n",
      "Df Model:              13                Pearson chi2:    810.      \n",
      "Df Residuals:          797               Scale:           1.0000    \n",
      "Method:                IRLS                                         \n",
      "--------------------------------------------------------------------\n",
      "                      Coef.  Std.Err.    z    P>|z|   [0.025  0.975]\n",
      "--------------------------------------------------------------------\n",
      "Intercept            -2.0035   0.3006 -6.6643 0.0000 -2.5928 -1.4143\n",
      "fam_rcc[T.Yes]        0.0942   0.3645  0.2585 0.7960 -0.6201  0.8085\n",
      "sex[T.Male]           0.7146   0.1781  4.0112 0.0001  0.3654  1.0637\n",
      "country[T.Brazil]    -0.8640   0.2946 -2.9324 0.0034 -1.4415 -0.2865\n",
      "country[T.Canada]    -0.8687   0.3807 -2.2819 0.0225 -1.6149 -0.1226\n",
      "country[T.Japan]     -2.7718   0.5006 -5.5368 0.0000 -3.7530 -1.7906\n",
      "country[T.Lithuania] -0.6341   0.5702 -1.1119 0.2662 -1.7517  0.4836\n",
      "country[T.Romania]    1.1841   0.4187  2.8278 0.0047  0.3634  2.0049\n",
      "country[T.Russia]    -0.2761   0.2398 -1.1513 0.2496 -0.7460  0.1939\n",
      "country[T.Serbia]    -0.0865   0.3292 -0.2629 0.7927 -0.7317  0.5587\n",
      "country[T.Thailand]  -2.4059   1.1775 -2.0431 0.0410 -4.7138 -0.0979\n",
      "country[T.UK]        -0.2895   0.2806 -1.0314 0.3024 -0.8395  0.2606\n",
      "tobacco_ever[T.Yes]  -0.0889   0.1733 -0.5131 0.6079 -0.4287  0.2508\n",
      "age_group             0.9921   0.0878 11.3004 0.0000  0.8201  1.1642\n",
      "====================================================================\n",
      "\n",
      "                          2.5%     97.5%        OR       p-value  p-value (corr)\n",
      "Intercept             0.074813  0.243098  0.134859  2.659077e-11    7.418824e-09\n",
      "fam_rcc[T.Yes]        0.537890  2.244582  1.098789  7.960243e-01    1.000000e+00\n",
      "sex[T.Male]           1.441103  2.897101  2.043286  6.041348e-05    1.685536e-02\n",
      "country[T.Brazil]     0.236569  0.750869  0.421464  3.363540e-03    9.384277e-01\n",
      "country[T.Canada]     0.198914  0.884658  0.419489  2.249646e-02    1.000000e+00\n",
      "country[T.Japan]      0.023448  0.166858  0.062550  3.080313e-08    8.594074e-06\n",
      "country[T.Lithuania]  0.173485  1.621840  0.530438  2.661640e-01    1.000000e+00\n",
      "country[T.Romania]    1.438202  7.425057  3.267833  4.687328e-03    1.000000e+00\n",
      "country[T.Russia]     0.474238  1.213972  0.758757  2.495937e-01    1.000000e+00\n",
      "country[T.Serbia]     0.481077  1.748330  0.917105  7.926507e-01    1.000000e+00\n",
      "country[T.Thailand]   0.008971  0.906698  0.090187  4.103835e-02    1.000000e+00\n",
      "country[T.UK]         0.431916  1.297708  0.748666  3.023518e-01    1.000000e+00\n",
      "tobacco_ever[T.Yes]   0.651354  1.285068  0.914896  6.078831e-01    1.000000e+00\n",
      "age_group             2.270638  3.203412  2.696996  1.305601e-29    3.642626e-27\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 720x504 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# regressions\n",
    "dataframe_to_regress = merged_sigs_with_metadata.copy()\n",
    "# create a log file\n",
    "log_file = open('./logs/logit_%s_%s_%s.log' % (run_name, analysis, '_'.join(mutation_types)), 'w') \n",
    "\n",
    "# common parameters\n",
    "use_CIs = True\n",
    "regression_type = 'logistic'\n",
    "apply_bonferroni = True\n",
    "factorise_age_group = True\n",
    "factorise_stage = True\n",
    "factorise_parameter = False\n",
    "p_for_plotting = 0.05\n",
    "p_for_printing = 1\n",
    "max_iterations = 1000\n",
    "# linear regression parameters\n",
    "log_transform = True\n",
    "ranked_inverse_normal_tranform = False\n",
    "# logistic regression parameters\n",
    "strictly_zero_reference = False\n",
    "above_below_median = False\n",
    "top_vs_bottom_quantile = False\n",
    "\n",
    "if regression_type == 'linear':\n",
    "    regression_type_family = sm.families.Gaussian()\n",
    "    variable_suffix = ''\n",
    "elif regression_type == 'logistic':\n",
    "    regression_type_family = sm.families.Binomial()\n",
    "    variable_suffix = '_bool'\n",
    "\n",
    "results_dataframe = pd.DataFrame(columns=['Risk factor', 'Signature', '2.5%', '97.5%', 'OR', 'p-value', 'p-value (corr)'])\n",
    "results_dataframe = results_dataframe.set_index(['Risk factor', 'Signature'])\n",
    "\n",
    "# fully adjusted model (all risk factors as confounders)\n",
    "# confounders_to_consider = ['sex', 'age_group', 'country', 'alcohol_ever', 'tobacco_ever', 'bmi_q', 'hypert', 'diabetes']\n",
    "# minimally adjusted model (only risk factors associating with signatures as confounders)\n",
    "confounders_to_consider = ['sex', 'age_group', 'country', 'tobacco_ever']#,'stage']\n",
    "reference_categories = {\n",
    "}\n",
    "\n",
    "for parameter in parameters_for_regressions:\n",
    "    list_of_confounders = confounders_to_consider.copy()\n",
    "    # recruitment period necessary to adjust for for PFAS variables\n",
    "    if 'PFO' in parameter:\n",
    "        list_of_confounders += ['rec_period']\n",
    "    # diabetes info is missing for some cases, and collinearity issues with rec_period\n",
    "    if ('PFO' in parameter) and 'diabetes' in list_of_confounders:\n",
    "        list_of_confounders.remove('diabetes')\n",
    "    new_parameter = True # dummy variable for log outputs\n",
    "    merged_table_for_regression = pd.merge(dataframe_to_regress[signatures], dataframe_to_regress[parameter].to_frame(), left_index=True, right_index=True)\n",
    "    merged_table_for_regression = pd.merge(dataframe_to_regress[[sig + '_abs' for sig in signatures]], merged_table_for_regression, left_index=True, right_index=True)\n",
    "    merged_table_for_regression = pd.merge(dataframe_to_regress[[mutation_type + '_burden' for mutation_type in mutation_types]], merged_table_for_regression, left_index=True, right_index=True)\n",
    "    merged_table_for_regression = pd.merge(dataframe_to_regress[[mutation_type + '_burden_bool' for mutation_type in mutation_types]], merged_table_for_regression, left_index=True, right_index=True)\n",
    "    if log_transform and regression_type == 'linear':\n",
    "        for mutation_type in mutation_types:\n",
    "            merged_table_for_regression.loc[ merged_table_for_regression[mutation_type + '_burden'] == 0, mutation_type + '_burden' ] = 1\n",
    "            merged_table_for_regression[mutation_type + '_burden'] = np.log(merged_table_for_regression[mutation_type + '_burden'])\n",
    "        for signature in signatures:\n",
    "            merged_table_for_regression.loc[ merged_table_for_regression[signature + '_abs'] == 0, signature + '_abs' ] = 1\n",
    "            merged_table_for_regression[signature + '_abs'] = np.log(merged_table_for_regression[signature + '_abs'])\n",
    "    merged_table_for_plotting = merged_table_for_regression.copy()\n",
    "    # add country in plotting dataframe\n",
    "    if not 'country' in merged_table_for_plotting.columns:\n",
    "        merged_table_for_plotting = pd.merge(merged_table_for_plotting, dataframe_to_regress['country'].to_frame(), left_index=True, right_index=True)\n",
    "#     merged_table_for_plotting[parameter] = pd.to_numeric(merged_table_for_plotting[parameter])\n",
    "    if 'years' in parameter:\n",
    "        merged_table_for_regression[parameter] = pd.to_numeric(merged_table_for_regression[parameter])\n",
    "    # adding confounding variables\n",
    "    for confounder in list_of_confounders:\n",
    "        if confounder!=parameter:\n",
    "            merged_table_for_regression = pd.merge(merged_table_for_regression, dataframe_to_regress[confounder].to_frame(), left_index=True, right_index=True)\n",
    "    merged_table_for_regression = merged_table_for_regression.replace('missing', np.nan)\n",
    "    merged_table_for_regression = merged_table_for_regression.replace('Missing', np.nan)\n",
    "    merged_table_for_regression = merged_table_for_regression.replace('Not applicable', np.nan)\n",
    "    merged_table_for_plotting = merged_table_for_plotting.replace('missing', np.nan)\n",
    "    merged_table_for_plotting = merged_table_for_plotting.replace('Missing', np.nan)\n",
    "    merged_table_for_plotting = merged_table_for_plotting.replace('Not applicable', np.nan)\n",
    "    merged_table_for_regression.dropna(subset=[parameter]+list_of_confounders, inplace=True)\n",
    "    merged_table_for_plotting.dropna(subset=[parameter], inplace=True)\n",
    "#     merged_table_for_regression.sort_values(parameter, ascending=False, inplace=True)\n",
    "    for signature in signatures:\n",
    "        merged_table_for_regression[signature + '_bool'] = np.nan\n",
    "        merged_table_for_plotting[signature] = np.nan\n",
    "        for sample in merged_table_for_regression.index:\n",
    "            attribution = merged_table_for_regression.loc[sample, signature + '_abs']\n",
    "            if use_CIs:\n",
    "                CI_string = merged_table_for_regression.loc[sample, signature]\n",
    "                if not pd.isnull(CI_string):\n",
    "                    CI_string = CI_string.translate({ord(i):None for i in '[,]'})\n",
    "                    central, lower_CI, upper_CI = [float(x) for x in CI_string.split()]\n",
    "                else:\n",
    "                    central, lower_CI, upper_CI = [0, 0, 0]\n",
    "                if strictly_zero_reference:\n",
    "                    if lower_CI==0 and upper_CI==0:\n",
    "                        attribution = 0\n",
    "                    elif lower_CI==0: # disregarding cases where CI = [0, a], a>0\n",
    "                        attribution = np.nan\n",
    "                else:\n",
    "                    if lower_CI==0:\n",
    "                        attribution = 0\n",
    "            merged_table_for_plotting.loc[sample, signature] = attribution\n",
    "            merged_table_for_regression.loc[sample, signature] = attribution\n",
    "            if attribution>0:\n",
    "                merged_table_for_regression.loc[sample, signature + '_bool'] = 1\n",
    "            elif attribution==0:\n",
    "                merged_table_for_regression.loc[sample, signature + '_bool'] = 0\n",
    "            else:\n",
    "                merged_table_for_regression.loc[sample, signature + '_bool'] = np.nan\n",
    "        if regression_type == 'logistic':\n",
    "            signature_frequency = merged_table_for_regression[signature + '_bool'].sum()/len(merged_table_for_regression.index)\n",
    "            if signature_frequency>=0.75 or above_below_median: #or signature in ['SBS1','ID1','ID2']:\n",
    "                if not above_below_median:\n",
    "                    print('Using below/above median model for signature %s, its frequency is %.2f' % (signature, signature_frequency))\n",
    "                for sample in merged_table_for_regression.index:\n",
    "                    central = merged_table_for_regression.loc[sample, signature]\n",
    "                    median = merged_table_for_regression[signature].median()\n",
    "                    if central <= median:\n",
    "                        merged_table_for_regression.loc[sample, signature + '_bool'] = 0\n",
    "                    else:\n",
    "                        merged_table_for_regression.loc[sample, signature + '_bool'] = 1\n",
    "            if top_vs_bottom_quantile:\n",
    "                print('Using top vs bottom quantile for signature %s' % signature)\n",
    "                merged_table_for_regression[signature + '_quantile'] = pd.qcut(merged_table_for_regression[signature + '_abs'], 4, labels=False, duplicates = 'drop')\n",
    "                for sample in merged_table_for_regression.index:\n",
    "                    if merged_table_for_regression.loc[sample, signature + '_quantile'] == 0:\n",
    "                        merged_table_for_regression.loc[sample, signature + '_bool'] = 0\n",
    "                    elif merged_table_for_regression.loc[sample, signature + '_quantile'] == 3:\n",
    "                        merged_table_for_regression.loc[sample, signature + '_bool'] = 1\n",
    "                    else:\n",
    "                        merged_table_for_regression.loc[sample, signature + '_bool'] = np.nan\n",
    "        merged_table_for_regression[signature + '_bool'] = pd.to_numeric(merged_table_for_regression[signature + '_bool'])\n",
    "        merged_table_for_regression[signature] = pd.to_numeric(merged_table_for_regression[signature])\n",
    "    signatures_to_run = signatures + [mutation_type + '_burden' for mutation_type in mutation_types]\n",
    "    for signature in signatures_to_run:\n",
    "        print('Running %s regression with parameter %s, signature %s' % (regression_type, parameter, signature))\n",
    "        if ranked_inverse_normal_tranform and regression_type == 'linear':\n",
    "            merged_table_for_regression[signature] = rank_INT(merged_table_for_regression[signature])\n",
    "            merged_table_for_plotting[signature] = rank_INT(merged_table_for_plotting[signature])\n",
    "        firth_method = False\n",
    "        if regression_type == 'logistic':\n",
    "            #automatic check for perfect/near perfect separation of data (standard Logit limitation)\n",
    "            for covariate in list_of_confounders:\n",
    "                if covariate=='age_group' or covariate=='age_diag':\n",
    "                    continue\n",
    "                for category in merged_table_for_regression[covariate].unique():\n",
    "                    category_counts = merged_table_for_regression[merged_table_for_regression[covariate]==category][signature + '_bool'].value_counts()\n",
    "                    if category_counts.size == 0:\n",
    "                        print('** Warning: Covariate %s, sig %s, perfect or near-perfect separation for category %s. Using the penalised approach (Firth method).' % (covariate, signature, category))\n",
    "                        firth_method = True\n",
    "                        break\n",
    "                    elif len(category_counts.values)==1 or category_counts.values[0]<1 or category_counts.values[-1]<1:\n",
    "                        print('** Warning: Covariate %s, sig %s, perfect or near-perfect separation for category %s. Using the penalised approach (Firth method).' % (covariate, signature, category))\n",
    "                        firth_method = True\n",
    "                        break\n",
    "            for category in merged_table_for_regression[parameter].unique():\n",
    "                category_counts = merged_table_for_regression[merged_table_for_regression[parameter]==category][signature + '_bool'].value_counts()\n",
    "                if category_counts.size == 0:\n",
    "                    print('** Warning: Covariate %s, sig %s, perfect or near-perfect separation for category %s. Using the penalised approach (Firth method).' % (covariate, signature, category))\n",
    "                    firth_method = True\n",
    "                    break\n",
    "                elif len(category_counts.values)==1 or category_counts.values[0]<1 or category_counts.values[-1]<1:\n",
    "                    print('** Warning: Parameter %s, sig %s, perfect or near-perfect separation for category %s. Using the penalised approach (Firth method).' % (parameter, signature, category))\n",
    "                    firth_method = True\n",
    "                    break\n",
    "            print('Zero counts for signature %s: %i' %( signature , len(merged_table_for_regression.index)-merged_table_for_regression[signature + '_bool'].sum()))\n",
    "            print('All counts for signature %s: %i' % (signature, len(merged_table_for_regression.index)))\n",
    "        confounders = list_of_confounders.copy()\n",
    "        if parameter in confounders:\n",
    "            confounders.remove(parameter)\n",
    "        if 'bmi' in parameter and 'obesity' in confounders:\n",
    "            confounders.remove('obesity')\n",
    "        if 'obesity'==parameter and 'bmi_q' in confounders:\n",
    "            confounders.remove('bmi_q')\n",
    "        for item in ['city','country','region']:\n",
    "            if item==parameter and item in confounders:\n",
    "                confounders.remove(item)\n",
    "        print('Covariates used:', confounders)\n",
    "        confounders_string = '+ '\n",
    "        confounders_string +=  ' + '.join(confounders)\n",
    "        if ('age_group' in confounders or parameter=='age_group') and factorise_age_group:\n",
    "            merged_table_for_regression['age_group'], _ = pd.factorize(merged_table_for_regression['age_group'], sort=True)\n",
    "        if ('stage' in confounders or parameter=='stage') and factorise_stage:\n",
    "            merged_table_for_regression['stage'], _ = pd.factorize(merged_table_for_regression['stage'], sort=True)\n",
    "        if 'bmi_q' in confounders or parameter=='bmi_q':\n",
    "            merged_table_for_regression['bmi_q'], _ = pd.factorize(merged_table_for_regression['bmi_q'], sort=True)\n",
    "        if 'PFO' in parameter:\n",
    "            merged_table_for_regression[parameter], _ = pd.factorize(merged_table_for_regression[parameter], sort=True)\n",
    "        if factorise_parameter:\n",
    "            merged_table_for_regression[parameter], _ = pd.factorize(merged_table_for_regression[parameter], sort=True)\n",
    "        if firth_method:\n",
    "            y, X = patsy.dmatrices(\"%s ~ %s %s\" % (signature + variable_suffix, parameter, confounders_string),\n",
    "                        data=merged_table_for_regression,\n",
    "                        return_type=\"dataframe\")\n",
    "            X.drop('Intercept', inplace=True, axis=1)\n",
    "            try:\n",
    "                fl = FirthLogisticRegression(max_iter=max_iterations)\n",
    "                fl.fit(X, y)\n",
    "                conf = pd.DataFrame(data=fl.ci_, index = list(X.columns) + ['Intercept'], columns = [0,1])\n",
    "                conf['OR'] = np.append(fl.coef_, fl.intercept_)\n",
    "                conf.columns = ['2.5%', '97.5%', 'OR']\n",
    "                conf = np.exp(conf)\n",
    "                conf['p-value'] = fl.pvals_\n",
    "                pvalues_dataframe = conf['p-value'].to_frame()\n",
    "                pvalues_dataframe.rename(columns={'p-value':0}, inplace=True)\n",
    "            except Exception as e:\n",
    "                print('* WARNING: Firth regression for model %s ~ %s %s failed' % (signature + '_bool', parameter, confounders_string))\n",
    "                print(e)\n",
    "                continue\n",
    "        else: # standard regression\n",
    "            try:\n",
    "                if parameter in reference_categories.keys():\n",
    "                    reference_category = reference_categories[parameter]\n",
    "                    formula_fit = smf.glm(\"%s ~ C(%s, Treatment('%s')) %s\"\n",
    "                                          % (signature + variable_suffix, parameter,\n",
    "                                             reference_category, confounders_string),\n",
    "                                          merged_table_for_regression,\n",
    "                                          family=regression_type_family).fit(maxiter=max_iterations) \n",
    "                else:\n",
    "                    formula_fit = smf.glm(\"%s ~ %s %s\" % # Q(\\\"%s\\\")\n",
    "                                          (signature + variable_suffix, parameter, confounders_string),\n",
    "                                          merged_table_for_regression,\n",
    "                                          family=regression_type_family).fit(maxiter=max_iterations) \n",
    "            except Exception as e:\n",
    "                print('* WARNING: Model %s ~ %s %s failed' % (signature + variable_suffix, parameter, confounders_string))\n",
    "                print(e)\n",
    "                continue\n",
    "            result = formula_fit\n",
    "            params = result.params\n",
    "            # ORs with confidence intervals\n",
    "            conf = result.conf_int()\n",
    "            conf['OR'] = params\n",
    "            conf.columns = ['2.5%', '97.5%', 'OR']\n",
    "            if regression_type == 'logistic':\n",
    "                conf = np.exp(conf)\n",
    "            pvalues_dataframe = result.pvalues.to_frame()\n",
    "            conf['p-value'] = pvalues_dataframe.copy()\n",
    "        # Bonferroni adjustment based on the number of signatures or mutation types\n",
    "        number_of_tests = 1\n",
    "        number_of_tests = (len(signatures)+len(mutation_types))*len(parameters_for_regressions)\n",
    "        print('Number of tests = ', number_of_tests)\n",
    "        corrected_pvalues_dataframe = pvalues_dataframe*number_of_tests\n",
    "        corrected_pvalues_dataframe[corrected_pvalues_dataframe > 1] = 1\n",
    "        conf['p-value (corr)'] = corrected_pvalues_dataframe\n",
    "        # minimum p-values\n",
    "        corrected_pvalue = corrected_pvalues_dataframe[corrected_pvalues_dataframe.index.str.contains(parameter)].min()[0]\n",
    "        uncorrected_pvalue = pvalues_dataframe[pvalues_dataframe.index.str.contains(parameter)].min()[0]\n",
    "        preexisting_hypothesis = False\n",
    "        if (apply_bonferroni and corrected_pvalue<=p_for_printing) or (not apply_bonferroni and uncorrected_pvalue<=p_for_printing) or preexisting_hypothesis:\n",
    "            if not firth_method:\n",
    "                print(formula_fit.summary2())\n",
    "            print(conf.to_string())\n",
    "            # fill the results dataframe\n",
    "            significant_parameter_name = conf[conf.index.str.contains(parameter)]['p-value'].idxmin()\n",
    "            significant_parameter_results = conf.loc[significant_parameter_name]\n",
    "            if 'alcohol_ever' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'alcohol_ever'\n",
    "            elif 'tobacco_ever' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'tobacco_ever'\n",
    "            elif 'sex' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'sex'\n",
    "            elif 'region' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'region'\n",
    "            elif 'city' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'city'\n",
    "            elif 'country' in significant_parameter_name:\n",
    "                parameter_in_table_name = 'country'\n",
    "            else:\n",
    "                parameter_in_table_name = significant_parameter_name\n",
    "            results_dataframe.loc[(parameter_in_table_name, signature),:] = significant_parameter_results\n",
    "            if corrected_pvalue<=0.05 and not apply_bonferroni:\n",
    "                print('This result passes Bonferroni correction.')\n",
    "            elif not apply_bonferroni:\n",
    "                print('This result does not pass Bonferroni correction.')\n",
    "        if (apply_bonferroni and corrected_pvalue<=p_for_plotting) or (not apply_bonferroni and uncorrected_pvalue<=p_for_plotting) or preexisting_hypothesis:\n",
    "            p = calculate_p_value(merged_table_for_plotting, parameter, signature)\n",
    "            if True:\n",
    "                p_value_string = latexify_p_value(p)\n",
    "                colormap_label = '%s' % signature\n",
    "                filename = 'boxplot_abs_mutations.pdf'\n",
    "                boxplot_filename = filename.replace('boxplot', parameter + '_' + signature)\n",
    "                parameter_latex = parameter.replace('_','-')\n",
    "                title = parameter_latex\n",
    "                if len(merged_table_for_plotting[parameter].unique())==2:\n",
    "                    p_value_string += ' (Mann-Whitney test)'\n",
    "                else:\n",
    "                    p_value_string += ' (Kruskal-Wallis test)'\n",
    "                title += ', ' + p_value_string\n",
    "                savepath = 'logit_results/' + run_name + '/' + parameter + '/' + boxplot_filename\n",
    "                savepath_OR = savepath.replace('.pdf', '_OR.pdf')\n",
    "                savepath_LM = savepath.replace('.pdf', '_LM.pdf')\n",
    "                try:\n",
    "                    make_OR_plot(conf.drop('Intercept'), title='OR for %s' % (signature), savepath = savepath_OR)\n",
    "                except Exception as e:\n",
    "                    print('Odd ratio plotting failed for %s: check your variables.' % savepath_OR)\n",
    "                    print(e)\n",
    "                if 'SBS1' == signature:\n",
    "                    merged_table_for_plotting.loc[merged_table_for_plotting['SBS1']>3000, 'SBS1'] = np.nan\n",
    "                if 'ID1' == signature:\n",
    "                    merged_table_for_plotting.loc[merged_table_for_plotting['ID1']>3000, 'ID1'] = np.nan\n",
    "                make_boxplot(merged_table_for_plotting, signature = signature, parameter = parameter, title = title, ylabel = colormap_label,\n",
    "                            relative = False, verbose=False, add_jitter = False, savepath = savepath)\n",
    "                # linear model plots across countries:\n",
    "                make_lm_plot(merged_table_for_plotting, signature = signature, parameter = parameter,\n",
    "                                 col = \"country\", title = title, ylabel = colormap_label,\n",
    "                                 savepath = savepath_LM)\n",
    "                # write some logs\n",
    "                log_file.write(conf.to_string())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                     OR (95% CI)  p-value  p-value (corr)\n",
      "Risk factor           Signature                                          \n",
      "Age of diagnosis      SBS1         1.6 (1.4-1.8)  3.4e-12         9.6e-10\n",
      "                      SBS2         1.0 (0.5-2.1)  5.6e-01         1.0e+00\n",
      "                      SBS4         1.4 (1.2-1.6)  5.3e-08         1.5e-05\n",
      "                      SBS5         1.9 (1.5-2.4)  7.1e-07         2.0e-04\n",
      "                      SBS12        1.2 (0.9-1.7)  2.0e-01         1.0e+00\n",
      "...                                          ...      ...             ...\n",
      "Family history of RCC ID12        1.7 (0.0-16.4)  5.6e-01         1.0e+00\n",
      "                      ID23        1.4 (0.0-14.8)  6.7e-01         1.0e+00\n",
      "                      SBS burden   1.6 (0.8-3.3)  2.2e-01         1.0e+00\n",
      "                      DBS burden   1.3 (0.7-2.7)  4.1e-01         1.0e+00\n",
      "                      ID burden    1.1 (0.5-2.2)  8.0e-01         1.0e+00\n",
      "\n",
      "[279 rows x 3 columns]\n"
     ]
    }
   ],
   "source": [
    "results_dataframe_to_print = results_dataframe.copy()\n",
    "results_dataframe_to_print['2.5%'] = pd.to_numeric(results_dataframe_to_print['2.5%'], errors='coerce')\n",
    "results_dataframe_to_print['97.5%'] = pd.to_numeric(results_dataframe_to_print['97.5%'], errors='coerce')\n",
    "results_dataframe_to_print['OR'] = pd.to_numeric(results_dataframe_to_print['OR'], errors='coerce')\n",
    "pd.options.display.float_format = '{:.1e}'.format\n",
    "if len(results_dataframe_to_print.index)>0:\n",
    "    results_dataframe_to_print = results_dataframe_to_print.apply(lambda x: [f'{y:.1f}' for y in x] if 'p-value' not in x.name else x)\n",
    "    results_dataframe_to_print['p-value'] = results_dataframe_to_print['p-value'].astype(float)\n",
    "    results_dataframe_to_print['p-value (corr)'] = results_dataframe_to_print['p-value (corr)'].astype(float)\n",
    "    results_dataframe_to_print['OR (95% CI)'] = results_dataframe_to_print['OR'] + ' (' + results_dataframe_to_print['2.5%'] + '-' + results_dataframe_to_print['97.5%'] + ')'\n",
    "    results_dataframe_to_print.drop(['2.5%','97.5%','OR'], axis=1, inplace=True)\n",
    "    variables_not_to_show = ['country']\n",
    "    for variable in variables_not_to_show:\n",
    "        if variable in results_dataframe_to_print.index:\n",
    "            results_dataframe_to_print.drop([variable], axis=0, inplace=True)\n",
    "    columns = list(sorted(results_dataframe_to_print.columns.values))\n",
    "    results_dataframe_to_print = results_dataframe_to_print[columns]\n",
    "    results_dataframe_to_print['OR (95% CI)'] = results_dataframe_to_print['OR (95% CI)'].replace({'nan (nan-nan)':'-'})\n",
    "for mutation_type in mutation_types:\n",
    "    results_dataframe_to_print.rename(index={mutation_type + '_burden':mutation_type + ' burden'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'age_group':'Age of diagnosis'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'sex':'Sex'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'tobacco_ever':'Tobacco smoking'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'fam_rcc':'Family history of RCC','fam_rcc[T.Yes]':'Family history of RCC'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'bmi_q':'BMI'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'stage':'Stage'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'hypert':'Hypertension','hypert[T.Yes]':'Hypertension'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'diabetes':'Diabetes','diabetes[T.Yes]':'Diabetes'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'PFOS_q':'PFOS','PFOA_q':'PFOA'},inplace=True)\n",
    "if analysis=='COSMIC':\n",
    "    results_dataframe_to_print.rename(index={'SBS1536A':'SBS40b','SBS1536B':'SBS40a','SBS1536F':'SBS40c','SBS22':'SBS22a','SBS1536I':'SBS22b','DBS78D':'DBS20','ID83C':'ID23'},inplace=True)\n",
    "    for variable, _ in results_dataframe_to_print.groupby(level=0):\n",
    "        if 'SBS44' in results_dataframe_to_print.loc[variable].index:\n",
    "            results_dataframe_to_print.loc[(variable, 'SBS44'), :], results_dataframe_to_print.loc[(variable, 'SBS22b'), :] = results_dataframe_to_print.loc[(variable, 'SBS22b')].copy(), results_dataframe_to_print.loc[(variable, 'SBS44')].copy()\n",
    "            results_dataframe_to_print = results_dataframe_to_print.rename({'SBS44': 'SBS22b', 'SBS22b': 'SBS44'})\n",
    "        if 'SBS40a' in results_dataframe_to_print.loc[variable].index:\n",
    "            results_dataframe_to_print.loc[(variable, 'SBS40a'), :], results_dataframe_to_print.loc[(variable, 'SBS40b'), :] = results_dataframe_to_print.loc[(variable, 'SBS40b')].copy(), results_dataframe_to_print.loc[(variable, 'SBS40a')].copy()\n",
    "            results_dataframe_to_print = results_dataframe_to_print.rename({'SBS40a': 'SBS40b', 'SBS40b': 'SBS40a'}, axis='rows')\n",
    "        if 'DBS20' in results_dataframe_to_print.loc[variable].index:\n",
    "            results_dataframe_to_print.loc[(variable, 'DBS20'), :], results_dataframe_to_print.loc[(variable, 'DBS78C'), :] = results_dataframe_to_print.loc[(variable, 'DBS78C')].copy(), results_dataframe_to_print.loc[(variable, 'DBS20')].copy()\n",
    "            results_dataframe_to_print = results_dataframe_to_print.rename({'DBS20': 'DBS78C', 'DBS78C': 'DBS20'}, axis='rows')\n",
    "results_dataframe_to_print.rename(index={'DBS78A':'DBS_A','DBS78B':'DBS_B','DBS78C':'DBS_C','DBS78D':'DBS_D'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'ID83A':'ID_A','ID83B':'ID_B','ID83C':'ID_C','ID83D':'ID_D','ID83E':'ID_E','ID83F':'ID_F','ID83G':'ID_G'},inplace=True)\n",
    "results_dataframe_to_print.rename(index={'SBS1536A':'SBS_A','SBS1536B':'SBS_B','SBS1536C':'SBS_C','SBS1536D':'SBS_D','SBS1536E':'SBS_E','SBS1536F':'SBS_F','SBS1536G':'SBS_G','SBS1536H':'SBS_H','SBS1536I':'SBS_I','SBS1536J':'SBS_J','SBS1536K':'SBS_K','SBS1536L':'SBS_L','SBS1536M':'SBS_M'},inplace=True)\n",
    "results_dataframe_to_print.to_csv('./logit_tables/' + run_name + '_' + analysis + '_results.csv', float_format='%.1e')\n",
    "print(results_dataframe_to_print)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3e5fe3d8c2224b4d930bab027c3e411d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "qgrid.show_grid(results_dataframe_to_print, grid_options={'forceFitColumns': False, 'defaultColumnWidth': 100})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
